[libdrm] amdgpu: add amdgpu_bo_handle_type_kms_noimport

Submitted by Marek Olšák on July 12, 2018, 12:47 a.m.

Details

Message ID 20180712004750.2024-1-maraeo@gmail.com
State New
Headers show
Series "amdgpu: add amdgpu_bo_handle_type_kms_noimport" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Marek Olšák July 12, 2018, 12:47 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 amdgpu/amdgpu.h    | 7 ++++++-
 amdgpu/amdgpu_bo.c | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 36f91058..be83b457 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -77,21 +77,26 @@  struct drm_amdgpu_info_hw_ip;
  *
 */
 enum amdgpu_bo_handle_type {
 	/** GEM flink name (needs DRM authentication, used by DRI2) */
 	amdgpu_bo_handle_type_gem_flink_name = 0,
 
 	/** KMS handle which is used by all driver ioctls */
 	amdgpu_bo_handle_type_kms = 1,
 
 	/** DMA-buf fd handle */
-	amdgpu_bo_handle_type_dma_buf_fd = 2
+	amdgpu_bo_handle_type_dma_buf_fd = 2,
+
+	/** KMS handle, but re-importing as a DMABUF handle through
+	 *  drmPrimeHandleToFD is forbidden. (Glamor does that)
+	 */
+	amdgpu_bo_handle_type_kms_noimport = 3,
 };
 
 /** Define known types of GPU VM VA ranges */
 enum amdgpu_gpu_va_range
 {
 	/** Allocate from "normal"/general range */
 	amdgpu_gpu_va_range_general = 0
 };
 
 enum amdgpu_sw_info {
diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
index 9e37b149..d29be244 100644
--- a/amdgpu/amdgpu_bo.c
+++ b/amdgpu/amdgpu_bo.c
@@ -234,20 +234,22 @@  int amdgpu_bo_export(amdgpu_bo_handle bo,
 	case amdgpu_bo_handle_type_gem_flink_name:
 		r = amdgpu_bo_export_flink(bo);
 		if (r)
 			return r;
 
 		*shared_handle = bo->flink_name;
 		return 0;
 
 	case amdgpu_bo_handle_type_kms:
 		amdgpu_add_handle_to_table(bo);
+		/* fall through */
+	case amdgpu_bo_handle_type_kms_noimport:
 		*shared_handle = bo->handle;
 		return 0;
 
 	case amdgpu_bo_handle_type_dma_buf_fd:
 		amdgpu_add_handle_to_table(bo);
 		return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
 					  DRM_CLOEXEC | DRM_RDWR,
 					  (int*)shared_handle);
 	}
 	return -EINVAL;
@@ -299,20 +301,21 @@  int amdgpu_bo_import(amdgpu_device_handle dev,
 		bo = util_hash_table_get(dev->bo_flink_names,
 					 (void*)(uintptr_t)shared_handle);
 		break;
 
 	case amdgpu_bo_handle_type_dma_buf_fd:
 		bo = util_hash_table_get(dev->bo_handles,
 					 (void*)(uintptr_t)shared_handle);
 		break;
 
 	case amdgpu_bo_handle_type_kms:
+	case amdgpu_bo_handle_type_kms_noimport:
 		/* Importing a KMS handle in not allowed. */
 		pthread_mutex_unlock(&dev->bo_table_mutex);
 		return -EPERM;
 
 	default:
 		pthread_mutex_unlock(&dev->bo_table_mutex);
 		return -EINVAL;
 	}
 
 	if (bo) {
@@ -368,20 +371,21 @@  int amdgpu_bo_import(amdgpu_device_handle dev,
 		util_hash_table_set(dev->bo_flink_names,
 				    (void*)(uintptr_t)bo->flink_name, bo);
 		break;
 
 	case amdgpu_bo_handle_type_dma_buf_fd:
 		bo->handle = shared_handle;
 		bo->alloc_size = dma_buf_size;
 		break;
 
 	case amdgpu_bo_handle_type_kms:
+	case amdgpu_bo_handle_type_kms_noimport:
 		assert(0); /* unreachable */
 	}
 
 	/* Initialize it. */
 	atomic_set(&bo->refcount, 1);
 	bo->dev = dev;
 	pthread_mutex_init(&bo->cpu_access_mutex, NULL);
 
 	util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
 	pthread_mutex_unlock(&dev->bo_table_mutex);

Comments

On 07/12/2018 08:47 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> ---
>   amdgpu/amdgpu.h    | 7 ++++++-
>   amdgpu/amdgpu_bo.c | 4 ++++
>   2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 36f91058..be83b457 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>    *
>   */
>   enum amdgpu_bo_handle_type {
>   	/** GEM flink name (needs DRM authentication, used by DRI2) */
>   	amdgpu_bo_handle_type_gem_flink_name = 0,
>
>   	/** KMS handle which is used by all driver ioctls */
>   	amdgpu_bo_handle_type_kms = 1,
>
>   	/** DMA-buf fd handle */
> -	amdgpu_bo_handle_type_dma_buf_fd = 2
> +	amdgpu_bo_handle_type_dma_buf_fd = 2,
> +
> +	/** KMS handle, but re-importing as a DMABUF handle through
> +	 *  drmPrimeHandleToFD is forbidden. (Glamor does that)
> +	 */
> +	amdgpu_bo_handle_type_kms_noimport = 3,
>   };
>
>   /** Define known types of GPU VM VA ranges */
>   enum amdgpu_gpu_va_range
>   {
>   	/** Allocate from "normal"/general range */
>   	amdgpu_gpu_va_range_general = 0
>   };
>
>   enum amdgpu_sw_info {
> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
> index 9e37b149..d29be244 100644
> --- a/amdgpu/amdgpu_bo.c
> +++ b/amdgpu/amdgpu_bo.c
> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>   	case amdgpu_bo_handle_type_gem_flink_name:
>   		r = amdgpu_bo_export_flink(bo);
>   		if (r)
>   			return r;
>
>   		*shared_handle = bo->flink_name;
>   		return 0;
>
>   	case amdgpu_bo_handle_type_kms:
>   		amdgpu_add_handle_to_table(bo);

We may reserve below code for type_kms, which is already used by others.
	*shared_handle = bo->handle;
	return 0;

otherwise, it may break somethings.

BTW, it's good to introduce a new type for compatibility.
That reminders me to update gbm accordingly.

Jerry

> +		/* fall through */
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		*shared_handle = bo->handle;
>   		return 0;
>
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		amdgpu_add_handle_to_table(bo);
>   		return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
>   					  DRM_CLOEXEC | DRM_RDWR,
>   					  (int*)shared_handle);
>   	}
>   	return -EINVAL;
> @@ -299,20 +301,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>   		bo = util_hash_table_get(dev->bo_flink_names,
>   					 (void*)(uintptr_t)shared_handle);
>   		break;
>
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		bo = util_hash_table_get(dev->bo_handles,
>   					 (void*)(uintptr_t)shared_handle);
>   		break;
>
>   	case amdgpu_bo_handle_type_kms:
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		/* Importing a KMS handle in not allowed. */
>   		pthread_mutex_unlock(&dev->bo_table_mutex);
>   		return -EPERM;
>
>   	default:
>   		pthread_mutex_unlock(&dev->bo_table_mutex);
>   		return -EINVAL;
>   	}
>
>   	if (bo) {
> @@ -368,20 +371,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>   		util_hash_table_set(dev->bo_flink_names,
>   				    (void*)(uintptr_t)bo->flink_name, bo);
>   		break;
>
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		bo->handle = shared_handle;
>   		bo->alloc_size = dma_buf_size;
>   		break;
>
>   	case amdgpu_bo_handle_type_kms:
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		assert(0); /* unreachable */
>   	}
>
>   	/* Initialize it. */
>   	atomic_set(&bo->refcount, 1);
>   	bo->dev = dev;
>   	pthread_mutex_init(&bo->cpu_access_mutex, NULL);
>
>   	util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>   	pthread_mutex_unlock(&dev->bo_table_mutex);
>
On Wed, Jul 11, 2018 at 9:14 PM, Zhang, Jerry (Junwei)
<Jerry.Zhang@amd.com> wrote:
> On 07/12/2018 08:47 AM, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> ---
>>   amdgpu/amdgpu.h    | 7 ++++++-
>>   amdgpu/amdgpu_bo.c | 4 ++++
>>   2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 36f91058..be83b457 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>>    *
>>   */
>>   enum amdgpu_bo_handle_type {
>>         /** GEM flink name (needs DRM authentication, used by DRI2) */
>>         amdgpu_bo_handle_type_gem_flink_name = 0,
>>
>>         /** KMS handle which is used by all driver ioctls */
>>         amdgpu_bo_handle_type_kms = 1,
>>
>>         /** DMA-buf fd handle */
>> -       amdgpu_bo_handle_type_dma_buf_fd = 2
>> +       amdgpu_bo_handle_type_dma_buf_fd = 2,
>> +
>> +       /** KMS handle, but re-importing as a DMABUF handle through
>> +        *  drmPrimeHandleToFD is forbidden. (Glamor does that)
>> +        */
>> +       amdgpu_bo_handle_type_kms_noimport = 3,
>>   };
>>
>>   /** Define known types of GPU VM VA ranges */
>>   enum amdgpu_gpu_va_range
>>   {
>>         /** Allocate from "normal"/general range */
>>         amdgpu_gpu_va_range_general = 0
>>   };
>>
>>   enum amdgpu_sw_info {
>> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
>> index 9e37b149..d29be244 100644
>> --- a/amdgpu/amdgpu_bo.c
>> +++ b/amdgpu/amdgpu_bo.c
>> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>>         case amdgpu_bo_handle_type_gem_flink_name:
>>                 r = amdgpu_bo_export_flink(bo);
>>                 if (r)
>>                         return r;
>>
>>                 *shared_handle = bo->flink_name;
>>                 return 0;
>>
>>         case amdgpu_bo_handle_type_kms:
>>                 amdgpu_add_handle_to_table(bo);
>
>
> We may reserve below code for type_kms, which is already used by others.
>         *shared_handle = bo->handle;
>         return 0;
>
> otherwise, it may break somethings.

What do you mean?

>
> BTW, it's good to introduce a new type for compatibility.

Thanks.

Marek
On 07/12/2018 09:14 AM, Zhang, Jerry (Junwei) wrote:
> On 07/12/2018 08:47 AM, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> ---
>>   amdgpu/amdgpu.h    | 7 ++++++-
>>   amdgpu/amdgpu_bo.c | 4 ++++
>>   2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 36f91058..be83b457 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>>    *
>>   */
>>   enum amdgpu_bo_handle_type {
>>       /** GEM flink name (needs DRM authentication, used by DRI2) */
>>       amdgpu_bo_handle_type_gem_flink_name = 0,
>>
>>       /** KMS handle which is used by all driver ioctls */
>>       amdgpu_bo_handle_type_kms = 1,
>>
>>       /** DMA-buf fd handle */
>> -    amdgpu_bo_handle_type_dma_buf_fd = 2
>> +    amdgpu_bo_handle_type_dma_buf_fd = 2,
>> +
>> +    /** KMS handle, but re-importing as a DMABUF handle through
>> +     *  drmPrimeHandleToFD is forbidden. (Glamor does that)
>> +     */
>> +    amdgpu_bo_handle_type_kms_noimport = 3,
>>   };
>>
>>   /** Define known types of GPU VM VA ranges */
>>   enum amdgpu_gpu_va_range
>>   {
>>       /** Allocate from "normal"/general range */
>>       amdgpu_gpu_va_range_general = 0
>>   };
>>
>>   enum amdgpu_sw_info {
>> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
>> index 9e37b149..d29be244 100644
>> --- a/amdgpu/amdgpu_bo.c
>> +++ b/amdgpu/amdgpu_bo.c
>> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>>       case amdgpu_bo_handle_type_gem_flink_name:
>>           r = amdgpu_bo_export_flink(bo);
>>           if (r)
>>               return r;
>>
>>           *shared_handle = bo->flink_name;
>>           return 0;
>>
>>       case amdgpu_bo_handle_type_kms:
>>           amdgpu_add_handle_to_table(bo);
>
> We may reserve below code for type_kms, which is already used by others.
>      *shared_handle = bo->handle;
>      return 0;
>
> otherwise, it may break somethings.

Please ignore above comment, that will be passed through for type_kms as well.

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Jerry

>
> BTW, it's good to introduce a new type for compatibility.
> That reminders me to update gbm accordingly.
>
> Jerry
>
>> +        /* fall through */
>> +    case amdgpu_bo_handle_type_kms_noimport:
>>           *shared_handle = bo->handle;
>>           return 0;
>>
>>       case amdgpu_bo_handle_type_dma_buf_fd:
>>           amdgpu_add_handle_to_table(bo);
>>           return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
>>                         DRM_CLOEXEC | DRM_RDWR,
>>                         (int*)shared_handle);
>>       }
>>       return -EINVAL;
>> @@ -299,20 +301,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>>           bo = util_hash_table_get(dev->bo_flink_names,
>>                        (void*)(uintptr_t)shared_handle);
>>           break;
>>
>>       case amdgpu_bo_handle_type_dma_buf_fd:
>>           bo = util_hash_table_get(dev->bo_handles,
>>                        (void*)(uintptr_t)shared_handle);
>>           break;
>>
>>       case amdgpu_bo_handle_type_kms:
>> +    case amdgpu_bo_handle_type_kms_noimport:
>>           /* Importing a KMS handle in not allowed. */
>>           pthread_mutex_unlock(&dev->bo_table_mutex);
>>           return -EPERM;
>>
>>       default:
>>           pthread_mutex_unlock(&dev->bo_table_mutex);
>>           return -EINVAL;
>>       }
>>
>>       if (bo) {
>> @@ -368,20 +371,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>>           util_hash_table_set(dev->bo_flink_names,
>>                       (void*)(uintptr_t)bo->flink_name, bo);
>>           break;
>>
>>       case amdgpu_bo_handle_type_dma_buf_fd:
>>           bo->handle = shared_handle;
>>           bo->alloc_size = dma_buf_size;
>>           break;
>>
>>       case amdgpu_bo_handle_type_kms:
>> +    case amdgpu_bo_handle_type_kms_noimport:
>>           assert(0); /* unreachable */
>>       }
>>
>>       /* Initialize it. */
>>       atomic_set(&bo->refcount, 1);
>>       bo->dev = dev;
>>       pthread_mutex_init(&bo->cpu_access_mutex, NULL);
>>
>>       util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>>       pthread_mutex_unlock(&dev->bo_table_mutex);
>>
On 2018年07月12日 08:47, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
less patch comment to describe why amdgpu_bo_handle_type_kms doesn't 
meet requriement and what patch does.
less Signed-off-by.

>
> ---
>   amdgpu/amdgpu.h    | 7 ++++++-
>   amdgpu/amdgpu_bo.c | 4 ++++
>   2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 36f91058..be83b457 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>    *
>   */
>   enum amdgpu_bo_handle_type {
>   	/** GEM flink name (needs DRM authentication, used by DRI2) */
>   	amdgpu_bo_handle_type_gem_flink_name = 0,
>   
>   	/** KMS handle which is used by all driver ioctls */
>   	amdgpu_bo_handle_type_kms = 1,
>   
>   	/** DMA-buf fd handle */
> -	amdgpu_bo_handle_type_dma_buf_fd = 2
> +	amdgpu_bo_handle_type_dma_buf_fd = 2,
> +
> +	/** KMS handle, but re-importing as a DMABUF handle through
> +	 *  drmPrimeHandleToFD is forbidden. (Glamor does that)
> +	 */
> +	amdgpu_bo_handle_type_kms_noimport = 3,
I'm always curious that these enum members are lowercase, could we 
change them to uppercase by this time?

Thanks,
David Zhou
>   };
>   
>   /** Define known types of GPU VM VA ranges */
>   enum amdgpu_gpu_va_range
>   {
>   	/** Allocate from "normal"/general range */
>   	amdgpu_gpu_va_range_general = 0
>   };
>   
>   enum amdgpu_sw_info {
> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
> index 9e37b149..d29be244 100644
> --- a/amdgpu/amdgpu_bo.c
> +++ b/amdgpu/amdgpu_bo.c
> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>   	case amdgpu_bo_handle_type_gem_flink_name:
>   		r = amdgpu_bo_export_flink(bo);
>   		if (r)
>   			return r;
>   
>   		*shared_handle = bo->flink_name;
>   		return 0;
>   
>   	case amdgpu_bo_handle_type_kms:
>   		amdgpu_add_handle_to_table(bo);
> +		/* fall through */
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		*shared_handle = bo->handle;
>   		return 0;
>   
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		amdgpu_add_handle_to_table(bo);
>   		return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
>   					  DRM_CLOEXEC | DRM_RDWR,
>   					  (int*)shared_handle);
>   	}
>   	return -EINVAL;
> @@ -299,20 +301,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>   		bo = util_hash_table_get(dev->bo_flink_names,
>   					 (void*)(uintptr_t)shared_handle);
>   		break;
>   
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		bo = util_hash_table_get(dev->bo_handles,
>   					 (void*)(uintptr_t)shared_handle);
>   		break;
>   
>   	case amdgpu_bo_handle_type_kms:
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		/* Importing a KMS handle in not allowed. */
>   		pthread_mutex_unlock(&dev->bo_table_mutex);
>   		return -EPERM;
>   
>   	default:
>   		pthread_mutex_unlock(&dev->bo_table_mutex);
>   		return -EINVAL;
>   	}
>   
>   	if (bo) {
> @@ -368,20 +371,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>   		util_hash_table_set(dev->bo_flink_names,
>   				    (void*)(uintptr_t)bo->flink_name, bo);
>   		break;
>   
>   	case amdgpu_bo_handle_type_dma_buf_fd:
>   		bo->handle = shared_handle;
>   		bo->alloc_size = dma_buf_size;
>   		break;
>   
>   	case amdgpu_bo_handle_type_kms:
> +	case amdgpu_bo_handle_type_kms_noimport:
>   		assert(0); /* unreachable */
>   	}
>   
>   	/* Initialize it. */
>   	atomic_set(&bo->refcount, 1);
>   	bo->dev = dev;
>   	pthread_mutex_init(&bo->cpu_access_mutex, NULL);
>   
>   	util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>   	pthread_mutex_unlock(&dev->bo_table_mutex);
On Wed, Jul 11, 2018 at 10:09 PM, zhoucm1 <zhoucm1@amd.com> wrote:
>
>
> On 2018年07月12日 08:47, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak@amd.com>
>
> less patch comment to describe why amdgpu_bo_handle_type_kms doesn't meet
> requriement and what patch does.

The comment in amdgpu.h describes it well. (at least I hope so)

> less Signed-off-by.

Not needed for libdrm as far as I know, but some people add it anyway.

>
>>
>> ---
>>   amdgpu/amdgpu.h    | 7 ++++++-
>>   amdgpu/amdgpu_bo.c | 4 ++++
>>   2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 36f91058..be83b457 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>>    *
>>   */
>>   enum amdgpu_bo_handle_type {
>>         /** GEM flink name (needs DRM authentication, used by DRI2) */
>>         amdgpu_bo_handle_type_gem_flink_name = 0,
>>         /** KMS handle which is used by all driver ioctls */
>>         amdgpu_bo_handle_type_kms = 1,
>>         /** DMA-buf fd handle */
>> -       amdgpu_bo_handle_type_dma_buf_fd = 2
>> +       amdgpu_bo_handle_type_dma_buf_fd = 2,
>> +
>> +       /** KMS handle, but re-importing as a DMABUF handle through
>> +        *  drmPrimeHandleToFD is forbidden. (Glamor does that)
>> +        */
>> +       amdgpu_bo_handle_type_kms_noimport = 3,
>
> I'm always curious that these enum members are lowercase, could we change
> them to uppercase by this time?

Existing open source projects #including amdgpu.h shouldn't fail
compilation with any newer libdrm, which limits us as to what we can
change in amdgpu.h.

Marek
On 2018-07-12 02:47 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> ---
>  amdgpu/amdgpu.h    | 7 ++++++-
>  amdgpu/amdgpu_bo.c | 4 ++++
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 36f91058..be83b457 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>   *
>  */
>  enum amdgpu_bo_handle_type {
>  	/** GEM flink name (needs DRM authentication, used by DRI2) */
>  	amdgpu_bo_handle_type_gem_flink_name = 0,
>  
>  	/** KMS handle which is used by all driver ioctls */
>  	amdgpu_bo_handle_type_kms = 1,
>  
>  	/** DMA-buf fd handle */
> -	amdgpu_bo_handle_type_dma_buf_fd = 2
> +	amdgpu_bo_handle_type_dma_buf_fd = 2,
> +
> +	/** KMS handle, but re-importing as a DMABUF handle through
> +	 *  drmPrimeHandleToFD is forbidden. (Glamor does that)
> +	 */
> +	amdgpu_bo_handle_type_kms_noimport = 3,
>  };
>  
>  /** Define known types of GPU VM VA ranges */
>  enum amdgpu_gpu_va_range
>  {
>  	/** Allocate from "normal"/general range */
>  	amdgpu_gpu_va_range_general = 0
>  };
>  
>  enum amdgpu_sw_info {
> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
> index 9e37b149..d29be244 100644
> --- a/amdgpu/amdgpu_bo.c
> +++ b/amdgpu/amdgpu_bo.c
> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>  	case amdgpu_bo_handle_type_gem_flink_name:
>  		r = amdgpu_bo_export_flink(bo);
>  		if (r)
>  			return r;
>  
>  		*shared_handle = bo->flink_name;
>  		return 0;
>  
>  	case amdgpu_bo_handle_type_kms:
>  		amdgpu_add_handle_to_table(bo);
> +		/* fall through */
> +	case amdgpu_bo_handle_type_kms_noimport:
>  		*shared_handle = bo->handle;
>  		return 0;

What is the rationale for this? I.e. why do you want to not store some
handles in the hash table? And how can code using
amdgpu_bo_handle_type_kms_noimport be sure that the BO will never be
re-imported via dma-buf?

The experience with the previous patch has shown that it's hard to keep
track of all possible ways in which BOs are imported, and that if we
miss one, this breaks pretty spectacularly.
On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:

> On 2018-07-12 02:47 AM, Marek Olšák wrote:
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > ---
> >  amdgpu/amdgpu.h    | 7 ++++++-
> >  amdgpu/amdgpu_bo.c | 4 ++++
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> > index 36f91058..be83b457 100644
> > --- a/amdgpu/amdgpu.h
> > +++ b/amdgpu/amdgpu.h
> > @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
> >   *
> >  */
> >  enum amdgpu_bo_handle_type {
> >       /** GEM flink name (needs DRM authentication, used by DRI2) */
> >       amdgpu_bo_handle_type_gem_flink_name = 0,
> >
> >       /** KMS handle which is used by all driver ioctls */
> >       amdgpu_bo_handle_type_kms = 1,
> >
> >       /** DMA-buf fd handle */
> > -     amdgpu_bo_handle_type_dma_buf_fd = 2
> > +     amdgpu_bo_handle_type_dma_buf_fd = 2,
> > +
> > +     /** KMS handle, but re-importing as a DMABUF handle through
> > +      *  drmPrimeHandleToFD is forbidden. (Glamor does that)
> > +      */
> > +     amdgpu_bo_handle_type_kms_noimport = 3,
> >  };
> >
> >  /** Define known types of GPU VM VA ranges */
> >  enum amdgpu_gpu_va_range
> >  {
> >       /** Allocate from "normal"/general range */
> >       amdgpu_gpu_va_range_general = 0
> >  };
> >
> >  enum amdgpu_sw_info {
> > diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
> > index 9e37b149..d29be244 100644
> > --- a/amdgpu/amdgpu_bo.c
> > +++ b/amdgpu/amdgpu_bo.c
> > @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
> >       case amdgpu_bo_handle_type_gem_flink_name:
> >               r = amdgpu_bo_export_flink(bo);
> >               if (r)
> >                       return r;
> >
> >               *shared_handle = bo->flink_name;
> >               return 0;
> >
> >       case amdgpu_bo_handle_type_kms:
> >               amdgpu_add_handle_to_table(bo);
> > +             /* fall through */
> > +     case amdgpu_bo_handle_type_kms_noimport:
> >               *shared_handle = bo->handle;
> >               return 0;
>
> What is the rationale for this? I.e. why do you want to not store some
> handles in the hash table?


Because I have the option.

And how can code using
> amdgpu_bo_handle_type_kms_noimport be sure that the BO will never be
> re-imported via dma-buf?
>

That's for the user to decide and prove when it's safe.


> The experience with the previous patch has shown that it's hard to keep
> track of all possible ways in which BOs are imported, and that if we
> miss one, this breaks pretty spectacularly.
>

You are assuming that it will be used incorrectly based on your previous
bad experience. All I need to do is not to hand the handle to components
that would misuse it.

Marek


>
> --
> Earthling Michel Dänzer               |               http://www.amd.com
> Libre software enthusiast             |             Mesa and X developer
>
On 2018-07-12 07:03 PM, Marek Olšák wrote:
> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:
>> On 2018-07-12 02:47 AM, Marek Olšák wrote:
>>> From: Marek Olšák <marek.olsak@amd.com>
>>>
>>> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
>>> index 9e37b149..d29be244 100644
>>> --- a/amdgpu/amdgpu_bo.c
>>> +++ b/amdgpu/amdgpu_bo.c
>>> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>>>       case amdgpu_bo_handle_type_gem_flink_name:
>>>               r = amdgpu_bo_export_flink(bo);
>>>               if (r)
>>>                       return r;
>>>
>>>               *shared_handle = bo->flink_name;
>>>               return 0;
>>>
>>>       case amdgpu_bo_handle_type_kms:
>>>               amdgpu_add_handle_to_table(bo);
>>> +             /* fall through */
>>> +     case amdgpu_bo_handle_type_kms_noimport:
>>>               *shared_handle = bo->handle;
>>>               return 0;
>>
>> What is the rationale for this? I.e. why do you want to not store some
>> handles in the hash table?
> 
> 
> Because I have the option.

Seems like you're expecting this patch to be accepted without providing
any real justification for it (here or in the corresponding Mesa patch).
NAK from me if so.


>> And how can code using amdgpu_bo_handle_type_kms_noimport be sure that
>> the BO will never be re-imported via dma-buf?
> 
> That's for the user to decide and prove when it's safe.

We shouldn't even have to think about this, let's use the mental
capacity for more useful things. :)

I'd rather add the handle to the hash table in amdgpu_bo_alloc,
amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
...) essentially free. In the unlikely (since allocating a BO from the
kernel is expensive) case that the hash table shows up on profiles, we
can optimize it.
On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
> On 2018-07-12 07:03 PM, Marek Olšák wrote:
>> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:
>>> On 2018-07-12 02:47 AM, Marek Olšák wrote:
>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>
>>>> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
>>>> index 9e37b149..d29be244 100644
>>>> --- a/amdgpu/amdgpu_bo.c
>>>> +++ b/amdgpu/amdgpu_bo.c
>>>> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>>>>       case amdgpu_bo_handle_type_gem_flink_name:
>>>>               r = amdgpu_bo_export_flink(bo);
>>>>               if (r)
>>>>                       return r;
>>>>
>>>>               *shared_handle = bo->flink_name;
>>>>               return 0;
>>>>
>>>>       case amdgpu_bo_handle_type_kms:
>>>>               amdgpu_add_handle_to_table(bo);
>>>> +             /* fall through */
>>>> +     case amdgpu_bo_handle_type_kms_noimport:
>>>>               *shared_handle = bo->handle;
>>>>               return 0;
>>>
>>> What is the rationale for this? I.e. why do you want to not store some
>>> handles in the hash table?
>>
>>
>> Because I have the option.
>
> Seems like you're expecting this patch to be accepted without providing
> any real justification for it (here or in the corresponding Mesa patch).
> NAK from me if so.

The real justification is implied by the patch. See: amdgpu_add_handle_to_table
Like I said: There is no risk of regression and it simplifies one
simple case trivially. We shouldn't have to even talk about it.

>
>
>>> And how can code using amdgpu_bo_handle_type_kms_noimport be sure that
>>> the BO will never be re-imported via dma-buf?
>>
>> That's for the user to decide and prove when it's safe.
>
> We shouldn't even have to think about this, let's use the mental
> capacity for more useful things. :)

Mental capacity spent to write the patch: 15 seconds
Mental capacity spent for bike-shedding: Minutes? Tens of minutes?

>
> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
> ...) essentially free. In the unlikely (since allocating a BO from the
> kernel is expensive) case that the hash table shows up on profiles, we
> can optimize it.

The hash table isn't very good for high BO counts. The time complexity
of a lookup is O(n).

Marek
On 2018-07-13 08:47 PM, Marek Olšák wrote:
> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 2018-07-12 07:03 PM, Marek Olšák wrote:
>>> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:
>>>>
>>>> What is the rationale for this? I.e. why do you want to not store some
>>>> handles in the hash table?
>>>
>>>
>>> Because I have the option.
>>
>> Seems like you're expecting this patch to be accepted without providing
>> any real justification for it (here or in the corresponding Mesa patch).
>> NAK from me if so.
> 
> The real justification is implied by the patch. See: amdgpu_add_handle_to_table
> Like I said: There is no risk of regression and it simplifies one
> simple case trivially. We shouldn't have to even talk about it.

IMO you haven't provided enough justification for adding API which is
prone to breakage if used incorrectly.

Other opinions?


>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
>> ...) essentially free. In the unlikely (since allocating a BO from the
>> kernel is expensive) case that the hash table shows up on profiles, we
>> can optimize it.
> 
> The hash table isn't very good for high BO counts. The time complexity
> of a lookup is O(n).

A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
amdgpu_create_bo_from_user_mem can just add the handle to the hash
bucket directly.

Do you know of, or can you imagine, any workload where amdgpu_bo_import
is called often enough for this to be a concern?
On Mon, Jul 16, 2018 at 12:05 PM, Michel Dänzer <michel@daenzer.net> wrote:
> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
>>> On 2018-07-12 07:03 PM, Marek Olšák wrote:
>>>> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:
>>>>>
>>>>> What is the rationale for this? I.e. why do you want to not store some
>>>>> handles in the hash table?
>>>>
>>>>
>>>> Because I have the option.
>>>
>>> Seems like you're expecting this patch to be accepted without providing
>>> any real justification for it (here or in the corresponding Mesa patch).
>>> NAK from me if so.
>>
>> The real justification is implied by the patch. See: amdgpu_add_handle_to_table
>> Like I said: There is no risk of regression and it simplifies one
>> simple case trivially. We shouldn't have to even talk about it.
>
> IMO you haven't provided enough justification for adding API which is
> prone to breakage if used incorrectly.
>
> Other opinions?
>
>
>>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>>> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
>>> ...) essentially free. In the unlikely (since allocating a BO from the
>>> kernel is expensive) case that the hash table shows up on profiles, we
>>> can optimize it.
>>
>> The hash table isn't very good for high BO counts. The time complexity
>> of a lookup is O(n).
>
> A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
> amdgpu_create_bo_from_user_mem can just add the handle to the hash
> bucket directly.
>
> Do you know of, or can you imagine, any workload where amdgpu_bo_import
> is called often enough for this to be a concern?

Fullscreen DRI2 or DRI3 re-imports buffers every frame. It might show
up in a profiler.

Marek
Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
>>> On 2018-07-12 07:03 PM, Marek Olšák wrote:
>>>> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net> wrote:
>>>>> What is the rationale for this? I.e. why do you want to not store some
>>>>> handles in the hash table?
>>>>
>>>> Because I have the option.
>>> Seems like you're expecting this patch to be accepted without providing
>>> any real justification for it (here or in the corresponding Mesa patch).
>>> NAK from me if so.
>> The real justification is implied by the patch. See: amdgpu_add_handle_to_table
>> Like I said: There is no risk of regression and it simplifies one
>> simple case trivially. We shouldn't have to even talk about it.
> IMO you haven't provided enough justification for adding API which is
> prone to breakage if used incorrectly.
>
> Other opinions?

I understand the reason why Marek wants to do this, but I agree that 
this is a little bit dangerous if used incorrectly.

On the other hand I don't see any other way to sanely handle it either.

>
>
>>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>>> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
>>> ...) essentially free. In the unlikely (since allocating a BO from the
>>> kernel is expensive) case that the hash table shows up on profiles, we
>>> can optimize it.
>> The hash table isn't very good for high BO counts. The time complexity
>> of a lookup is O(n).
> A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
> amdgpu_create_bo_from_user_mem can just add the handle to the hash
> bucket directly.
>
> Do you know of, or can you imagine, any workload where amdgpu_bo_import
> is called often enough for this to be a concern?

MM interop with GFX usually imports BOs on each frame, so that can hurt 
here.

Christian.
On 2018-07-17 08:50 AM, Christian König wrote:
> Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net>
>>> wrote:
>>>> On 2018-07-12 07:03 PM, Marek Olšák wrote:
>>>>> On Thu, Jul 12, 2018, 3:31 AM Michel Dänzer <michel@daenzer.net>
>>>>> wrote:
>>>>>> What is the rationale for this? I.e. why do you want to not store
>>>>>> some
>>>>>> handles in the hash table?
>>>>>
>>>>> Because I have the option.
>>>> Seems like you're expecting this patch to be accepted without providing
>>>> any real justification for it (here or in the corresponding Mesa
>>>> patch).
>>>> NAK from me if so.
>>> The real justification is implied by the patch. See:
>>> amdgpu_add_handle_to_table
>>> Like I said: There is no risk of regression and it simplifies one
>>> simple case trivially. We shouldn't have to even talk about it.
>> IMO you haven't provided enough justification for adding API which is
>> prone to breakage if used incorrectly.
>>
>> Other opinions?
> 
> I understand the reason why Marek wants to do this, but I agree that
> this is a little bit dangerous if used incorrectly.
> 
> On the other hand I don't see any other way to sanely handle it either.

Sanely handle what exactly? :) I still haven't seen any description of
an actual problem, other than "the handle is stored in the hash table".


>>>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>>>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>>>> amdgpu_bo_export, making amdgpu_bo_export(bo,
>>>> amdgpu_bo_handle_type_kms,
>>>> ...) essentially free. In the unlikely (since allocating a BO from the
>>>> kernel is expensive) case that the hash table shows up on profiles, we
>>>> can optimize it.
>>> The hash table isn't very good for high BO counts. The time complexity
>>> of a lookup is O(n).
>> A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
>> amdgpu_create_bo_from_user_mem can just add the handle to the hash
>> bucket directly.
>>
>> Do you know of, or can you imagine, any workload where amdgpu_bo_import
>> is called often enough for this to be a concern?
> 
> MM interop with GFX usually imports BOs on each frame, so that can hurt
> here.

That's always the same set of BOs in the steady state, right? So it's
easy to make the repeated lookups fast, by moving them to the start of
their hash buckets.
Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
> On 2018-07-17 08:50 AM, Christian König wrote:
>> Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
>>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>> [SNIP]
>>> Other opinions?
>> I understand the reason why Marek wants to do this, but I agree that
>> this is a little bit dangerous if used incorrectly.
>>
>> On the other hand I don't see any other way to sanely handle it either.
> Sanely handle what exactly? :) I still haven't seen any description of
> an actual problem, other than "the handle is stored in the hash table".

Well the problem is that it's not "the handle" but rather "all handles" 
which are now stored in the hash table.

To begin with that is quite a bunch of wasted memory, not talking about 
the extra CPU cycles.

>> MM interop with GFX usually imports BOs on each frame, so that can hurt
>> here.
> That's always the same set of BOs in the steady state, right? So it's
> easy to make the repeated lookups fast, by moving them to the start of
> their hash buckets.

Yeah, that can help with the CPU cycles but it is still not ideal.

I think that Mareks change is justified, but we should add a comment 
explaining the restrictions.

Christian.
On 2018-07-17 09:33 AM, Christian König wrote:
> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>> On 2018-07-17 08:50 AM, Christian König wrote:
>>> Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
>>>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>>> [SNIP]
>>>> Other opinions?
>>> I understand the reason why Marek wants to do this, but I agree that
>>> this is a little bit dangerous if used incorrectly.
>>>
>>> On the other hand I don't see any other way to sanely handle it either.
>> Sanely handle what exactly? :) I still haven't seen any description of
>> an actual problem, other than "the handle is stored in the hash table".
> 
> Well the problem is that it's not "the handle" but rather "all handles"
> which are now stored in the hash table.
> 
> To begin with that is quite a bunch of wasted memory, not talking about
> the extra CPU cycles.

All that should be needed is one struct list_head per BO, 16 bytes on
64-bit.
Am 17.07.2018 um 09:46 schrieb Michel Dänzer:
> On 2018-07-17 09:33 AM, Christian König wrote:
>> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>>> On 2018-07-17 08:50 AM, Christian König wrote:
>>>> Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
>>>>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>>>> [SNIP]
>>>>> Other opinions?
>>>> I understand the reason why Marek wants to do this, but I agree that
>>>> this is a little bit dangerous if used incorrectly.
>>>>
>>>> On the other hand I don't see any other way to sanely handle it either.
>>> Sanely handle what exactly? :) I still haven't seen any description of
>>> an actual problem, other than "the handle is stored in the hash table".
>> Well the problem is that it's not "the handle" but rather "all handles"
>> which are now stored in the hash table.
>>
>> To begin with that is quite a bunch of wasted memory, not talking about
>> the extra CPU cycles.
> All that should be needed is one struct list_head per BO, 16 bytes on
> 64-bit.

+malloc overhead and that for *every* BO the application/driver 
allocated. The last time I looked we could easily have a few thousands 
of that (but not in the same CS).

So I would guess that the wasted memory can easily be in the lower kb 
range, compared to adding just a flag that we never going to import the 
handle again.

Christian.
On 2018-07-17 09:59 AM, Christian König wrote:
> Am 17.07.2018 um 09:46 schrieb Michel Dänzer:
>> On 2018-07-17 09:33 AM, Christian König wrote:
>>> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>>>> On 2018-07-17 08:50 AM, Christian König wrote:
>>>>> Am 16.07.2018 um 18:05 schrieb Michel Dänzer:
>>>>>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>>>>> [SNIP]
>>>>>> Other opinions?
>>>>> I understand the reason why Marek wants to do this, but I agree that
>>>>> this is a little bit dangerous if used incorrectly.
>>>>>
>>>>> On the other hand I don't see any other way to sanely handle it
>>>>> either.
>>>> Sanely handle what exactly? :) I still haven't seen any description of
>>>> an actual problem, other than "the handle is stored in the hash table".
>>> Well the problem is that it's not "the handle" but rather "all handles"
>>> which are now stored in the hash table.
>>>
>>> To begin with that is quite a bunch of wasted memory, not talking about
>>> the extra CPU cycles.
>> All that should be needed is one struct list_head per BO, 16 bytes on
>> 64-bit.
> 
> +malloc overhead and that for *every* BO the application/driver
> allocated.

The struct list_head can be stored in struct amdgpu_bo, no additional
malloc necessary.


> The last time I looked we could easily have a few thousands of that
> (but not in the same CS).
> 
> So I would guess that the wasted memory can easily be in the lower kb
> range, compared to adding just a flag that we never going to import the
> handle again.

I wouldn't call the memory "wasted", as it serves a clear purpose.
Am 17.07.2018 um 10:03 schrieb Michel Dänzer:
> On 2018-07-17 09:59 AM, Christian König wrote:
>> Am 17.07.2018 um 09:46 schrieb Michel Dänzer:
>>> On 2018-07-17 09:33 AM, Christian König wrote:
>>>> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>>>> [SNIP]
>>> All that should be needed is one struct list_head per BO, 16 bytes on
>>> 64-bit.
>> +malloc overhead and that for *every* BO the application/driver
>> allocated.
> The struct list_head can be stored in struct amdgpu_bo, no additional
> malloc necessary.

Well that sounds we are not talking about the same code, do we?

IIRC the hashtable implementation in libdrm is using an ever growing 
array for the BOs and *NOT* a linked list.

So we have at least two mallocs involved here, the one for the key/value 
pair and the one for the node array.

Regards,
Christian.

>
>
>> The last time I looked we could easily have a few thousands of that
>> (but not in the same CS).
>>
>> So I would guess that the wasted memory can easily be in the lower kb
>> range, compared to adding just a flag that we never going to import the
>> handle again.
> I wouldn't call the memory "wasted", as it serves a clear purpose.
>
>
On 2018-07-17 10:19 AM, Christian König wrote:
> Am 17.07.2018 um 10:03 schrieb Michel Dänzer:
>> On 2018-07-17 09:59 AM, Christian König wrote:
>>> Am 17.07.2018 um 09:46 schrieb Michel Dänzer:
>>>> On 2018-07-17 09:33 AM, Christian König wrote:
>>>>> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>>>>> [SNIP]
>>>> All that should be needed is one struct list_head per BO, 16 bytes on
>>>> 64-bit.
>>> +malloc overhead and that for *every* BO the application/driver
>>> allocated.
>> The struct list_head can be stored in struct amdgpu_bo, no additional
>> malloc necessary.
> 
> Well that sounds we are not talking about the same code, do we?
> 
> IIRC the hashtable implementation in libdrm is using an ever growing
> array for the BOs and *NOT* a linked list.

So let's use something more suitable, e.g.:

An array of 2^n struct list_head in struct amdgpu_device for the hash
buckets. The BO's handle is hashed to the bucket number

 handle & (2^n - 1)

and linked in there via struct list_head in struct amdgpu_bo.
amdgpu_bo_alloc and amdgpu_create_bo_from_user_mem add the handle at the
end of the list, amdgpu_bo_import adds it at or moves it to the beginning.
Am 17.07.2018 um 10:30 schrieb Michel Dänzer:
> On 2018-07-17 10:19 AM, Christian König wrote:
>> Am 17.07.2018 um 10:03 schrieb Michel Dänzer:
>>> On 2018-07-17 09:59 AM, Christian König wrote:
>>>> Am 17.07.2018 um 09:46 schrieb Michel Dänzer:
>>>>> On 2018-07-17 09:33 AM, Christian König wrote:
>>>>>> Am 17.07.2018 um 09:26 schrieb Michel Dänzer:
>>>>>> [SNIP]
>>>>> All that should be needed is one struct list_head per BO, 16 bytes on
>>>>> 64-bit.
>>>> +malloc overhead and that for *every* BO the application/driver
>>>> allocated.
>>> The struct list_head can be stored in struct amdgpu_bo, no additional
>>> malloc necessary.
>> Well that sounds we are not talking about the same code, do we?
>>
>> IIRC the hashtable implementation in libdrm is using an ever growing
>> array for the BOs and *NOT* a linked list.
> So let's use something more suitable, e.g.:
>
> An array of 2^n struct list_head in struct amdgpu_device for the hash
> buckets. The BO's handle is hashed to the bucket number
>
>   handle & (2^n - 1)
>
> and linked in there via struct list_head in struct amdgpu_bo.
> amdgpu_bo_alloc and amdgpu_create_bo_from_user_mem add the handle at the
> end of the list, amdgpu_bo_import adds it at or moves it to the beginning.

Yeah, that would certainly reduce the problem quite a bit and would 
allow us to get rid of the util_hash* implementation which to me always 
seemed to be a bit overkill.

I actually don't see a reason why amdgpu_create_bo_from_user_mem() 
should add the handle at all, those BOs are not exportable.

Christian.
On 2018-07-16 08:51 PM, Marek Olšák wrote:
> On Mon, Jul 16, 2018 at 12:05 PM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
>>
>>>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>>>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>>>> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
>>>> ...) essentially free. In the unlikely (since allocating a BO from the
>>>> kernel is expensive) case that the hash table shows up on profiles, we
>>>> can optimize it.
>>>
>>> The hash table isn't very good for high BO counts. The time complexity
>>> of a lookup is O(n).
>>
>> A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
>> amdgpu_create_bo_from_user_mem can just add the handle to the hash
>> bucket directly.
>>
>> Do you know of, or can you imagine, any workload where amdgpu_bo_import
>> is called often enough for this to be a concern?
> 
> Fullscreen DRI2 or DRI3 re-imports buffers every frame.

DRI3 doesn't. The X server only imports each DRI3 buffer once, after
that it's referred to via the pixmap XID.


With DRI2 page flipping (ignoring that basically nobody's using that
anymore with radeonsi :), it's always the same set of buffers, so the
lookup can be made fast as discussed in the sub-thread with Christian.
(Also, DRI2 can only use page flipping with sync-to-vblank enabled, so
this happens on the order of hundreds of times per second max)
Michel, I think you are wasting your time. This change can be misused
as easily as any other API. It's not more dangerous that any other
amdgpu libdrm function. You won't achieve anything by optimizing the
hash table (= losing time), and you also won't achieve anything by
NAKing this (= losing performance on the lookup). Both are lose-lose
solutions, because you'll lose and others will lose too.

Marek

On Tue, Jul 17, 2018 at 4:57 AM, Michel Dänzer <michel@daenzer.net> wrote:
> On 2018-07-16 08:51 PM, Marek Olšák wrote:
>> On Mon, Jul 16, 2018 at 12:05 PM, Michel Dänzer <michel@daenzer.net> wrote:
>>> On 2018-07-13 08:47 PM, Marek Olšák wrote:
>>>> On Fri, Jul 13, 2018 at 4:28 AM, Michel Dänzer <michel@daenzer.net> wrote:
>>>
>>>>> I'd rather add the handle to the hash table in amdgpu_bo_alloc,
>>>>> amdgpu_create_bo_from_user_mem and amdgpu_bo_import instead of in
>>>>> amdgpu_bo_export, making amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms,
>>>>> ...) essentially free. In the unlikely (since allocating a BO from the
>>>>> kernel is expensive) case that the hash table shows up on profiles, we
>>>>> can optimize it.
>>>>
>>>> The hash table isn't very good for high BO counts. The time complexity
>>>> of a lookup is O(n).
>>>
>>> A lookup is only needed in amdgpu_bo_import. amdgpu_bo_alloc and
>>> amdgpu_create_bo_from_user_mem can just add the handle to the hash
>>> bucket directly.
>>>
>>> Do you know of, or can you imagine, any workload where amdgpu_bo_import
>>> is called often enough for this to be a concern?
>>
>> Fullscreen DRI2 or DRI3 re-imports buffers every frame.
>
> DRI3 doesn't. The X server only imports each DRI3 buffer once, after
> that it's referred to via the pixmap XID.
>
>
> With DRI2 page flipping (ignoring that basically nobody's using that
> anymore with radeonsi :), it's always the same set of buffers, so the
> lookup can be made fast as discussed in the sub-thread with Christian.
> (Also, DRI2 can only use page flipping with sync-to-vblank enabled, so
> this happens on the order of hundreds of times per second max)
>
>
> --
> Earthling Michel Dänzer               |               http://www.amd.com
> Libre software enthusiast             |             Mesa and X developer
On 2018-07-17 08:14 PM, Marek Olšák wrote:
> Michel, I think you are wasting your time. This change can be misused
> as easily as any other API. It's not more dangerous that any other
> amdgpu libdrm function.

That's trivially false.

> You won't achieve anything by optimizing the hash table (= losing time),
> [...]

I think you're focusing too much on your immediate desire instead of the
big(ger) picture.

E.g. I see amdgpu_bo_export getting called from surprising places (in
Xorg), performing a hash table lookup each time. Fixing that would
achieve something, though probably not much.

Anyway, adding dangerous API (keep in mind that we don't control all
libdrm_amdgpu users, or even know how they're using it) for something
that can also be achieved without is just a bad idea. Avoiding that is
achievement enough.


This is my opinion. I don't have veto power over libdrm_amdgpu. At least
Christian seems in favour of this change.
On Wed, Jul 18, 2018 at 11:55 AM, Michel Dänzer <michel@daenzer.net> wrote:
> On 2018-07-17 08:14 PM, Marek Olšák wrote:
>> Michel, I think you are wasting your time. This change can be misused
>> as easily as any other API. It's not more dangerous that any other
>> amdgpu libdrm function.
>
> That's trivially false.
>
>> You won't achieve anything by optimizing the hash table (= losing time),
>> [...]
>
> I think you're focusing too much on your immediate desire instead of the
> big(ger) picture.
>
> E.g. I see amdgpu_bo_export getting called from surprising places (in
> Xorg), performing a hash table lookup each time. Fixing that would
> achieve something, though probably not much.

I know about the use in Xorg and this patch actually indirectly
mentions it (it mentions Glamor in the code). The flag contains
_noimport to self-document itself to mitigate incorrect usage.

>
> Anyway, adding dangerous API (keep in mind that we don't control all
> libdrm_amdgpu users, or even know how they're using it) for something
> that can also be achieved without is just a bad idea. Avoiding that is
> achievement enough.

We don't need to control other libdrm users. They can control
themselves. :) I'm totally fine with incorrect usage leading to bad
things, like any other bug. Much worse things can be done with the CS
ioctl.

Marek
Christian,

Would you please give me an Rb if the patch is OK with you? I have
spoken with Michel and he would be OK with me pushing it as long as it
gets an Rb from either you or Alex.

Thanks,
Marek

On Wed, Jul 11, 2018 at 8:47 PM, Marek Olšák <maraeo@gmail.com> wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> ---
>  amdgpu/amdgpu.h    | 7 ++++++-
>  amdgpu/amdgpu_bo.c | 4 ++++
>  2 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 36f91058..be83b457 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>   *
>  */
>  enum amdgpu_bo_handle_type {
>         /** GEM flink name (needs DRM authentication, used by DRI2) */
>         amdgpu_bo_handle_type_gem_flink_name = 0,
>
>         /** KMS handle which is used by all driver ioctls */
>         amdgpu_bo_handle_type_kms = 1,
>
>         /** DMA-buf fd handle */
> -       amdgpu_bo_handle_type_dma_buf_fd = 2
> +       amdgpu_bo_handle_type_dma_buf_fd = 2,
> +
> +       /** KMS handle, but re-importing as a DMABUF handle through
> +        *  drmPrimeHandleToFD is forbidden. (Glamor does that)
> +        */
> +       amdgpu_bo_handle_type_kms_noimport = 3,
>  };
>
>  /** Define known types of GPU VM VA ranges */
>  enum amdgpu_gpu_va_range
>  {
>         /** Allocate from "normal"/general range */
>         amdgpu_gpu_va_range_general = 0
>  };
>
>  enum amdgpu_sw_info {
> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
> index 9e37b149..d29be244 100644
> --- a/amdgpu/amdgpu_bo.c
> +++ b/amdgpu/amdgpu_bo.c
> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>         case amdgpu_bo_handle_type_gem_flink_name:
>                 r = amdgpu_bo_export_flink(bo);
>                 if (r)
>                         return r;
>
>                 *shared_handle = bo->flink_name;
>                 return 0;
>
>         case amdgpu_bo_handle_type_kms:
>                 amdgpu_add_handle_to_table(bo);
> +               /* fall through */
> +       case amdgpu_bo_handle_type_kms_noimport:
>                 *shared_handle = bo->handle;
>                 return 0;
>
>         case amdgpu_bo_handle_type_dma_buf_fd:
>                 amdgpu_add_handle_to_table(bo);
>                 return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
>                                           DRM_CLOEXEC | DRM_RDWR,
>                                           (int*)shared_handle);
>         }
>         return -EINVAL;
> @@ -299,20 +301,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>                 bo = util_hash_table_get(dev->bo_flink_names,
>                                          (void*)(uintptr_t)shared_handle);
>                 break;
>
>         case amdgpu_bo_handle_type_dma_buf_fd:
>                 bo = util_hash_table_get(dev->bo_handles,
>                                          (void*)(uintptr_t)shared_handle);
>                 break;
>
>         case amdgpu_bo_handle_type_kms:
> +       case amdgpu_bo_handle_type_kms_noimport:
>                 /* Importing a KMS handle in not allowed. */
>                 pthread_mutex_unlock(&dev->bo_table_mutex);
>                 return -EPERM;
>
>         default:
>                 pthread_mutex_unlock(&dev->bo_table_mutex);
>                 return -EINVAL;
>         }
>
>         if (bo) {
> @@ -368,20 +371,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>                 util_hash_table_set(dev->bo_flink_names,
>                                     (void*)(uintptr_t)bo->flink_name, bo);
>                 break;
>
>         case amdgpu_bo_handle_type_dma_buf_fd:
>                 bo->handle = shared_handle;
>                 bo->alloc_size = dma_buf_size;
>                 break;
>
>         case amdgpu_bo_handle_type_kms:
> +       case amdgpu_bo_handle_type_kms_noimport:
>                 assert(0); /* unreachable */
>         }
>
>         /* Initialize it. */
>         atomic_set(&bo->refcount, 1);
>         bo->dev = dev;
>         pthread_mutex_init(&bo->cpu_access_mutex, NULL);
>
>         util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>         pthread_mutex_unlock(&dev->bo_table_mutex);
> --
> 2.17.1
>
Patch is Reviewed-by: Christian König <christian.koenig@amd.com>

Regards,
Christian.

Am 24.07.2018 um 20:11 schrieb Marek Olšák:
> Christian,
>
> Would you please give me an Rb if the patch is OK with you? I have
> spoken with Michel and he would be OK with me pushing it as long as it
> gets an Rb from either you or Alex.
>
> Thanks,
> Marek
>
> On Wed, Jul 11, 2018 at 8:47 PM, Marek Olšák <maraeo@gmail.com> wrote:
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> ---
>>   amdgpu/amdgpu.h    | 7 ++++++-
>>   amdgpu/amdgpu_bo.c | 4 ++++
>>   2 files changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
>> index 36f91058..be83b457 100644
>> --- a/amdgpu/amdgpu.h
>> +++ b/amdgpu/amdgpu.h
>> @@ -77,21 +77,26 @@ struct drm_amdgpu_info_hw_ip;
>>    *
>>   */
>>   enum amdgpu_bo_handle_type {
>>          /** GEM flink name (needs DRM authentication, used by DRI2) */
>>          amdgpu_bo_handle_type_gem_flink_name = 0,
>>
>>          /** KMS handle which is used by all driver ioctls */
>>          amdgpu_bo_handle_type_kms = 1,
>>
>>          /** DMA-buf fd handle */
>> -       amdgpu_bo_handle_type_dma_buf_fd = 2
>> +       amdgpu_bo_handle_type_dma_buf_fd = 2,
>> +
>> +       /** KMS handle, but re-importing as a DMABUF handle through
>> +        *  drmPrimeHandleToFD is forbidden. (Glamor does that)
>> +        */
>> +       amdgpu_bo_handle_type_kms_noimport = 3,
>>   };
>>
>>   /** Define known types of GPU VM VA ranges */
>>   enum amdgpu_gpu_va_range
>>   {
>>          /** Allocate from "normal"/general range */
>>          amdgpu_gpu_va_range_general = 0
>>   };
>>
>>   enum amdgpu_sw_info {
>> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c
>> index 9e37b149..d29be244 100644
>> --- a/amdgpu/amdgpu_bo.c
>> +++ b/amdgpu/amdgpu_bo.c
>> @@ -234,20 +234,22 @@ int amdgpu_bo_export(amdgpu_bo_handle bo,
>>          case amdgpu_bo_handle_type_gem_flink_name:
>>                  r = amdgpu_bo_export_flink(bo);
>>                  if (r)
>>                          return r;
>>
>>                  *shared_handle = bo->flink_name;
>>                  return 0;
>>
>>          case amdgpu_bo_handle_type_kms:
>>                  amdgpu_add_handle_to_table(bo);
>> +               /* fall through */
>> +       case amdgpu_bo_handle_type_kms_noimport:
>>                  *shared_handle = bo->handle;
>>                  return 0;
>>
>>          case amdgpu_bo_handle_type_dma_buf_fd:
>>                  amdgpu_add_handle_to_table(bo);
>>                  return drmPrimeHandleToFD(bo->dev->fd, bo->handle,
>>                                            DRM_CLOEXEC | DRM_RDWR,
>>                                            (int*)shared_handle);
>>          }
>>          return -EINVAL;
>> @@ -299,20 +301,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>>                  bo = util_hash_table_get(dev->bo_flink_names,
>>                                           (void*)(uintptr_t)shared_handle);
>>                  break;
>>
>>          case amdgpu_bo_handle_type_dma_buf_fd:
>>                  bo = util_hash_table_get(dev->bo_handles,
>>                                           (void*)(uintptr_t)shared_handle);
>>                  break;
>>
>>          case amdgpu_bo_handle_type_kms:
>> +       case amdgpu_bo_handle_type_kms_noimport:
>>                  /* Importing a KMS handle in not allowed. */
>>                  pthread_mutex_unlock(&dev->bo_table_mutex);
>>                  return -EPERM;
>>
>>          default:
>>                  pthread_mutex_unlock(&dev->bo_table_mutex);
>>                  return -EINVAL;
>>          }
>>
>>          if (bo) {
>> @@ -368,20 +371,21 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>>                  util_hash_table_set(dev->bo_flink_names,
>>                                      (void*)(uintptr_t)bo->flink_name, bo);
>>                  break;
>>
>>          case amdgpu_bo_handle_type_dma_buf_fd:
>>                  bo->handle = shared_handle;
>>                  bo->alloc_size = dma_buf_size;
>>                  break;
>>
>>          case amdgpu_bo_handle_type_kms:
>> +       case amdgpu_bo_handle_type_kms_noimport:
>>                  assert(0); /* unreachable */
>>          }
>>
>>          /* Initialize it. */
>>          atomic_set(&bo->refcount, 1);
>>          bo->dev = dev;
>>          pthread_mutex_init(&bo->cpu_access_mutex, NULL);
>>
>>          util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo);
>>          pthread_mutex_unlock(&dev->bo_table_mutex);
>> --
>> 2.17.1
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx