[9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs

Submitted by Christian König on Aug. 25, 2017, 9:38 a.m.

Details

Message ID 1503653899-1781-9-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Christian König Aug. 25, 2017, 9:38 a.m.
From: Christian König <christian.koenig@amd.com>

Add the IOCTL interface so that applications can allocate per VM BOs.

Still WIP since not all corner cases are tested yet, but this reduces average
CS overhead for 10K BOs from 21ms down to 48us.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
 include/uapi/drm/amdgpu_drm.h             |  2 ++
 5 files changed, 51 insertions(+), 22 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b1e817c..21cab36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -457,9 +457,10 @@  struct amdgpu_sa_bo {
  */
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj);
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
 
 int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 0e907ea..7256f83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -144,7 +144,7 @@  static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
-				       true, &gobj);
+				       true, NULL, &gobj);
 	if (ret) {
 		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d028806..b8e8d67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -44,11 +44,12 @@  void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 }
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj)
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj)
 {
-	struct amdgpu_bo *robj;
+	struct amdgpu_bo *bo;
 	int r;
 
 	*obj = NULL;
@@ -59,7 +60,7 @@  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 
 retry:
 	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, 0, &robj);
+			     flags, NULL, resv, 0, &bo);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -71,7 +72,7 @@  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		}
 		return r;
 	}
-	*obj = &robj->gem_base;
+	*obj = &bo->gem_base;
 
 	return 0;
 }
@@ -136,13 +137,14 @@  void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list;
+	struct list_head list, duplicates;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -150,7 +152,7 @@  void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -185,9 +187,12 @@  int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_gem_create *args = data;
 	uint64_t flags = args->in.domain_flags;
 	uint64_t size = args->in.bo_size;
+	struct reservation_object *resv = NULL;
 	struct drm_gem_object *gobj;
 	uint32_t handle;
 	int r;
@@ -196,7 +201,8 @@  int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
+		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
+		      AMDGPU_GEM_CREATE_LOCAL))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
@@ -223,9 +229,25 @@  int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	}
 	size = roundup(size, PAGE_SIZE);
 
+	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
+		if (r)
+			return r;
+
+		resv = vm->root.base.bo->tbo.resv;
+	}
+
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     flags, false, &gobj);
+				     flags, false, resv, &gobj);
+	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
+		if (!r) {
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
+		}
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 	if (r)
 		return r;
 
@@ -267,9 +289,8 @@  int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* create a gem object to contain this object in */
-	r = amdgpu_gem_object_create(adev, args->size, 0,
-				     AMDGPU_GEM_DOMAIN_CPU, 0,
-				     0, &gobj);
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+				     0, 0, NULL, &gobj);
 	if (r)
 		return r;
 
@@ -521,7 +542,7 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
-	struct list_head list;
+	struct list_head list, duplicates;
 	uint64_t va_flags;
 	int r = 0;
 
@@ -557,6 +578,7 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
 	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
 		gobj = drm_gem_object_lookup(filp, args->handle);
@@ -573,7 +595,7 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_unref;
 
@@ -639,6 +661,7 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *filp)
 {
+	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_gem_op *args = data;
 	struct drm_gem_object *gobj;
 	struct amdgpu_bo *robj;
@@ -686,6 +709,9 @@  int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
+		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
+			amdgpu_vm_bo_invalidate(adev, robj, true);
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -715,8 +741,7 @@  int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 	r = amdgpu_gem_object_create(adev, args->size, 0,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				     ttm_bo_type_device,
-				     &gobj);
+				     false, NULL, &gobj);
 	if (r)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 5b3f928..f407499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -136,7 +136,8 @@  struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
 		return ERR_PTR(-EPERM);
 
 	return drm_gem_prime_export(dev, gobj, flags);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d0ee739..05241a6 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -89,6 +89,8 @@  extern "C" {
 #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is local in the VM */
+#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */

Comments

On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Add the IOCTL interface so that applications can allocate per VM BOs.
>
> Still WIP since not all corner cases are tested yet, but this reduces average
> CS overhead for 10K BOs from 21ms down to 48us.
Wow, cheers, eventually you get per vm bo to same reservation with 
PD/pts, indeed save a lot of bo list.

overall looks good, I will take a detailed check for this tomorrow.

Regards,
David Zhou
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>   5 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b1e817c..21cab36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>    */
>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj);
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj);
>   
>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   			    struct drm_device *dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> index 0e907ea..7256f83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>   				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>   				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
> -				       true, &gobj);
> +				       true, NULL, &gobj);
>   	if (ret) {
>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..b8e8d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>   }
>   
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj)
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj)
>   {
> -	struct amdgpu_bo *robj;
> +	struct amdgpu_bo *bo;
>   	int r;
>   
>   	*obj = NULL;
> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   
>   retry:
>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
> -			     flags, NULL, NULL, 0, &robj);
> +			     flags, NULL, resv, 0, &bo);
>   	if (r) {
>   		if (r != -ERESTARTSYS) {
>   			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   		}
>   		return r;
>   	}
> -	*obj = &robj->gem_base;
> +	*obj = &bo->gem_base;
>   
>   	return 0;
>   }
> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   
>   	struct amdgpu_bo_list_entry vm_pd;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
>   	struct amdgpu_bo_va *bo_va;
>   	int r;
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   
>   	tv.bo = &bo->tbo;
>   	tv.shared = true;
> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   
>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>   	if (r) {
>   		dev_err(adev->dev, "leaking bo va because "
>   			"we fail to reserve bo (%d)\n", r);
> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *filp)
>   {
>   	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_vm *vm = &fpriv->vm;
>   	union drm_amdgpu_gem_create *args = data;
>   	uint64_t flags = args->in.domain_flags;
>   	uint64_t size = args->in.bo_size;
> +	struct reservation_object *resv = NULL;
>   	struct drm_gem_object *gobj;
>   	uint32_t handle;
>   	int r;
> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
> +		      AMDGPU_GEM_CREATE_LOCAL))
>   		return -EINVAL;
>   
>   	/* reject invalid gem domains */
> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	}
>   	size = roundup(size, PAGE_SIZE);
>   
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +		if (r)
> +			return r;
> +
> +		resv = vm->root.base.bo->tbo.resv;
> +	}
> +
>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>   				     (u32)(0xffffffff & args->in.domains),
> -				     flags, false, &gobj);
> +				     flags, false, resv, &gobj);
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		if (!r) {
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
> +		}
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>   	if (r)
>   		return r;
>   
> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	/* create a gem object to contain this object in */
> -	r = amdgpu_gem_object_create(adev, args->size, 0,
> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
> -				     0, &gobj);
> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
> +				     0, 0, NULL, &gobj);
>   	if (r)
>   		return r;
>   
> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	struct amdgpu_bo_list_entry vm_pd;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	uint64_t va_flags;
>   	int r = 0;
>   
> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>   		gobj = drm_gem_object_lookup(filp, args->handle);
> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   
>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>   	if (r)
>   		goto error_unref;
>   
> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *filp)
>   {
> +	struct amdgpu_device *adev = dev->dev_private;
>   	struct drm_amdgpu_gem_op *args = data;
>   	struct drm_gem_object *gobj;
>   	struct amdgpu_bo *robj;
> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>   			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>   
> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
> +			amdgpu_vm_bo_invalidate(adev, robj, true);
> +
>   		amdgpu_bo_unreserve(robj);
>   		break;
>   	default:
> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>   				     AMDGPU_GEM_DOMAIN_VRAM,
>   				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> -				     ttm_bo_type_device,
> -				     &gobj);
> +				     false, NULL, &gobj);
>   	if (r)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..f407499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   {
>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>   
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>   		return ERR_PTR(-EPERM);
>   
>   	return drm_gem_prime_export(dev, gobj, flags);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d0ee739..05241a6 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -89,6 +89,8 @@ extern "C" {
>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>   /* Flag that allocating the BO should use linear VRAM */
>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
> +/* Flag that BO is local in the VM */
> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>   
>   struct drm_amdgpu_gem_create_in  {
>   	/** the requested memory size */
Am 25.08.2017 um 12:32 schrieb zhoucm1:
>
>
> On 2017年08月25日 17:38, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces 
>> average
>> CS overhead for 10K BOs from 21ms down to 48us.
> Wow, cheers, eventually you get per vm bo to same reservation with 
> PD/pts, indeed save a lot of bo list.

Don't cheer to loud yet, that is a completely constructed test case.

So far I wasn't able to archive any improvements with any real game on 
this with Mesa.

BTW: Marek can you take a look with some CPU bound tests? I can provide 
a kernel branch if necessary.

Regards,
Christian.

> overall looks good, I will take a detailed check for this tomorrow.
>
> Regards,
> David Zhou
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 
>> ++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>    */
>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned 
>> long size,
>> -                int alignment, u32 initial_domain,
>> -                u64 flags, bool kernel,
>> -                struct drm_gem_object **obj);
>> +                 int alignment, u32 initial_domain,
>> +                 u64 flags, bool kernel,
>> +                 struct reservation_object *resv,
>> +                 struct drm_gem_object **obj);
>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>                   struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct 
>> amdgpu_fbdev *rfbdev,
>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> -                       true, &gobj);
>> +                       true, NULL, &gobj);
>>       if (ret) {
>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>           return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d028806..b8e8d67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object 
>> *gobj)
>>   }
>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned 
>> long size,
>> -                int alignment, u32 initial_domain,
>> -                u64 flags, bool kernel,
>> -                struct drm_gem_object **obj)
>> +                 int alignment, u32 initial_domain,
>> +                 u64 flags, bool kernel,
>> +                 struct reservation_object *resv,
>> +                 struct drm_gem_object **obj)
>>   {
>> -    struct amdgpu_bo *robj;
>> +    struct amdgpu_bo *bo;
>>       int r;
>>         *obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device 
>> *adev, unsigned long size,
>>     retry:
>>       r = amdgpu_bo_create(adev, size, alignment, kernel, 
>> initial_domain,
>> -                 flags, NULL, NULL, 0, &robj);
>> +                 flags, NULL, resv, 0, &bo);
>>       if (r) {
>>           if (r != -ERESTARTSYS) {
>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device 
>> *adev, unsigned long size,
>>           }
>>           return r;
>>       }
>> -    *obj = &robj->gem_base;
>> +    *obj = &bo->gem_base;
>>         return 0;
>>   }
>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>       struct amdgpu_vm *vm = &fpriv->vm;
>>         struct amdgpu_bo_list_entry vm_pd;
>> -    struct list_head list;
>> +    struct list_head list, duplicates;
>>       struct ttm_validate_buffer tv;
>>       struct ww_acquire_ctx ticket;
>>       struct amdgpu_bo_va *bo_va;
>>       int r;
>>         INIT_LIST_HEAD(&list);
>> +    INIT_LIST_HEAD(&duplicates);
>>         tv.bo = &bo->tbo;
>>       tv.shared = true;
>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>       if (r) {
>>           dev_err(adev->dev, "leaking bo va because "
>>               "we fail to reserve bo (%d)\n", r);
>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *filp)
>>   {
>>       struct amdgpu_device *adev = dev->dev_private;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>       union drm_amdgpu_gem_create *args = data;
>>       uint64_t flags = args->in.domain_flags;
>>       uint64_t size = args->in.bo_size;
>> +    struct reservation_object *resv = NULL;
>>       struct drm_gem_object *gobj;
>>       uint32_t handle;
>>       int r;
>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> +              AMDGPU_GEM_CREATE_LOCAL))
>>           return -EINVAL;
>>         /* reject invalid gem domains */
>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>       }
>>       size = roundup(size, PAGE_SIZE);
>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +        if (r)
>> +            return r;
>> +
>> +        resv = vm->root.base.bo->tbo.resv;
>> +    }
>> +
>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>                        (u32)(0xffffffff & args->in.domains),
>> -                     flags, false, &gobj);
>> +                     flags, false, resv, &gobj);
>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +        if (!r) {
>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>> +
>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> +        }
>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>> +    }
>>       if (r)
>>           return r;
>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device 
>> *dev, void *data,
>>       }
>>         /* create a gem object to contain this object in */
>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>> -                     0, &gobj);
>> +    r = amdgpu_gem_object_create(adev, args->size, 0, 
>> AMDGPU_GEM_DOMAIN_CPU,
>> +                     0, 0, NULL, &gobj);
>>       if (r)
>>           return r;
>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>       struct amdgpu_bo_list_entry vm_pd;
>>       struct ttm_validate_buffer tv;
>>       struct ww_acquire_ctx ticket;
>> -    struct list_head list;
>> +    struct list_head list, duplicates;
>>       uint64_t va_flags;
>>       int r = 0;
>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>       }
>>         INIT_LIST_HEAD(&list);
>> +    INIT_LIST_HEAD(&duplicates);
>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>           gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>       if (r)
>>           goto error_unref;
>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>               struct drm_file *filp)
>>   {
>> +    struct amdgpu_device *adev = dev->dev_private;
>>       struct drm_amdgpu_gem_op *args = data;
>>       struct drm_gem_object *gobj;
>>       struct amdgpu_bo *robj;
>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, 
>> void *data,
>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>>           amdgpu_bo_unreserve(robj);
>>           break;
>>       default:
>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file 
>> *file_priv,
>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> -                     ttm_bo_type_device,
>> -                     &gobj);
>> +                     false, NULL, &gobj);
>>       if (r)
>>           return -ENOMEM;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
>> drm_device *dev,
>>   {
>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>           return ERR_PTR(-EPERM);
>>         return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h 
>> b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>   /* Flag that allocating the BO should use linear VRAM */
>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>     struct drm_amdgpu_gem_create_in  {
>>       /** the requested memory size */
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On Fri, Aug 25, 2017 at 3:00 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>
>>
>>
>> On 2017年08月25日 17:38, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>
>>> Still WIP since not all corner cases are tested yet, but this reduces
>>> average
>>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>> indeed save a lot of bo list.
>
>
> Don't cheer to loud yet, that is a completely constructed test case.
>
> So far I wasn't able to archive any improvements with any real game on this
> with Mesa.
>
> BTW: Marek can you take a look with some CPU bound tests? I can provide a
> kernel branch if necessary.

Do you have a branch that works on Raven? This patch series doesn't,
and I didn't investigate why.

Marek

>
> Regards,
> Christian.
>
>
>> overall looks good, I will take a detailed check for this tomorrow.
>>
>> Regards,
>> David Zhou
>>>
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b1e817c..21cab36 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>    */
>>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>> size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj);
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj);
>>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>                   struct drm_device *dev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> index 0e907ea..7256f83 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>> amdgpu_fbdev *rfbdev,
>>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> -                       true, &gobj);
>>> +                       true, NULL, &gobj);
>>>       if (ret) {
>>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>           return -ENOMEM;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d028806..b8e8d67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>> *gobj)
>>>   }
>>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>> long size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj)
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj)
>>>   {
>>> -    struct amdgpu_bo *robj;
>>> +    struct amdgpu_bo *bo;
>>>       int r;
>>>         *obj = NULL;
>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>     retry:
>>>       r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>> -                 flags, NULL, NULL, 0, &robj);
>>> +                 flags, NULL, resv, 0, &bo);
>>>       if (r) {
>>>           if (r != -ERESTARTSYS) {
>>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>           }
>>>           return r;
>>>       }
>>> -    *obj = &robj->gem_base;
>>> +    *obj = &bo->gem_base;
>>>         return 0;
>>>   }
>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct amdgpu_vm *vm = &fpriv->vm;
>>>         struct amdgpu_bo_list_entry vm_pd;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>>       struct amdgpu_bo_va *bo_va;
>>>       int r;
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>         tv.bo = &bo->tbo;
>>>       tv.shared = true;
>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>       if (r) {
>>>           dev_err(adev->dev, "leaking bo va because "
>>>               "we fail to reserve bo (%d)\n", r);
>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>                   struct drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>       union drm_amdgpu_gem_create *args = data;
>>>       uint64_t flags = args->in.domain_flags;
>>>       uint64_t size = args->in.bo_size;
>>> +    struct reservation_object *resv = NULL;
>>>       struct drm_gem_object *gobj;
>>>       uint32_t handle;
>>>       int r;
>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>           return -EINVAL;
>>>         /* reject invalid gem domains */
>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>       size = roundup(size, PAGE_SIZE);
>>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +        resv = vm->root.base.bo->tbo.resv;
>>> +    }
>>> +
>>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>                        (u32)(0xffffffff & args->in.domains),
>>> -                     flags, false, &gobj);
>>> +                     flags, false, resv, &gobj);
>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        if (!r) {
>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>> +
>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>> +        }
>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>> +    }
>>>       if (r)
>>>           return r;
>>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>> *dev, void *data,
>>>       }
>>>         /* create a gem object to contain this object in */
>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>> -                     0, &gobj);
>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> AMDGPU_GEM_DOMAIN_CPU,
>>> +                     0, 0, NULL, &gobj);
>>>       if (r)
>>>           return r;
>>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       struct amdgpu_bo_list_entry vm_pd;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       uint64_t va_flags;
>>>       int r = 0;
>>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>           gobj = drm_gem_object_lookup(filp, args->handle);
>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>> *data,
>>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>       if (r)
>>>           goto error_unref;
>>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>               struct drm_file *filp)
>>>   {
>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>       struct drm_amdgpu_gem_op *args = data;
>>>       struct drm_gem_object *gobj;
>>>       struct amdgpu_bo *robj;
>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>> *data,
>>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>> +
>>>           amdgpu_bo_unreserve(robj);
>>>           break;
>>>       default:
>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>> *file_priv,
>>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>> -                     ttm_bo_type_device,
>>> -                     &gobj);
>>> +                     false, NULL, &gobj);
>>>       if (r)
>>>           return -ENOMEM;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index 5b3f928..f407499 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>> drm_device *dev,
>>>   {
>>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>           return ERR_PTR(-EPERM);
>>>         return drm_gem_prime_export(dev, gobj, flags);
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index d0ee739..05241a6 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -89,6 +89,8 @@ extern "C" {
>>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>   /* Flag that allocating the BO should use linear VRAM */
>>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>> +/* Flag that BO is local in the VM */
>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>     struct drm_amdgpu_gem_create_in  {
>>>       /** the requested memory size */
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Am 25.08.2017 um 18:22 schrieb Marek Olšák:
> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>
>>>
>>> On 2017年08月25日 17:38, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>>
>>>> Still WIP since not all corner cases are tested yet, but this reduces
>>>> average
>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>>> indeed save a lot of bo list.
>>
>> Don't cheer to loud yet, that is a completely constructed test case.
>>
>> So far I wasn't able to archive any improvements with any real game on this
>> with Mesa.
>>
>> BTW: Marek can you take a look with some CPU bound tests? I can provide a
>> kernel branch if necessary.
> Do you have a branch that works on Raven? This patch series doesn't,
> and I didn't investigate why.

I will come up with one on Monday.

Have a nice weekend guys,
Christian.

>
> Marek
>
>> Regards,
>> Christian.
>>
>>
>>> overall looks good, I will take a detailed check for this tomorrow.
>>>
>>> Regards,
>>> David Zhou
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>>> ++++++++++++++++++++++---------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>>    include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>>    5 files changed, 51 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b1e817c..21cab36 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>>     */
>>>>    void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>>    int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>>> size,
>>>> -                int alignment, u32 initial_domain,
>>>> -                u64 flags, bool kernel,
>>>> -                struct drm_gem_object **obj);
>>>> +                 int alignment, u32 initial_domain,
>>>> +                 u64 flags, bool kernel,
>>>> +                 struct reservation_object *resv,
>>>> +                 struct drm_gem_object **obj);
>>>>      int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>>                    struct drm_device *dev,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> index 0e907ea..7256f83 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>> amdgpu_fbdev *rfbdev,
>>>>                           AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>                           AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>>                           AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>> -                       true, &gobj);
>>>> +                       true, NULL, &gobj);
>>>>        if (ret) {
>>>>            pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>>            return -ENOMEM;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> index d028806..b8e8d67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>>> *gobj)
>>>>    }
>>>>      int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>>> long size,
>>>> -                int alignment, u32 initial_domain,
>>>> -                u64 flags, bool kernel,
>>>> -                struct drm_gem_object **obj)
>>>> +                 int alignment, u32 initial_domain,
>>>> +                 u64 flags, bool kernel,
>>>> +                 struct reservation_object *resv,
>>>> +                 struct drm_gem_object **obj)
>>>>    {
>>>> -    struct amdgpu_bo *robj;
>>>> +    struct amdgpu_bo *bo;
>>>>        int r;
>>>>          *obj = NULL;
>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>>      retry:
>>>>        r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>>> -                 flags, NULL, NULL, 0, &robj);
>>>> +                 flags, NULL, resv, 0, &bo);
>>>>        if (r) {
>>>>            if (r != -ERESTARTSYS) {
>>>>                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>>            }
>>>>            return r;
>>>>        }
>>>> -    *obj = &robj->gem_base;
>>>> +    *obj = &bo->gem_base;
>>>>          return 0;
>>>>    }
>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>          struct amdgpu_bo_list_entry vm_pd;
>>>> -    struct list_head list;
>>>> +    struct list_head list, duplicates;
>>>>        struct ttm_validate_buffer tv;
>>>>        struct ww_acquire_ctx ticket;
>>>>        struct amdgpu_bo_va *bo_va;
>>>>        int r;
>>>>          INIT_LIST_HEAD(&list);
>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>          tv.bo = &bo->tbo;
>>>>        tv.shared = true;
>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>>          amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>>        if (r) {
>>>>            dev_err(adev->dev, "leaking bo va because "
>>>>                "we fail to reserve bo (%d)\n", r);
>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>                    struct drm_file *filp)
>>>>    {
>>>>        struct amdgpu_device *adev = dev->dev_private;
>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>        union drm_amdgpu_gem_create *args = data;
>>>>        uint64_t flags = args->in.domain_flags;
>>>>        uint64_t size = args->in.bo_size;
>>>> +    struct reservation_object *resv = NULL;
>>>>        struct drm_gem_object *gobj;
>>>>        uint32_t handle;
>>>>        int r;
>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>                  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>>                  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>>            return -EINVAL;
>>>>          /* reject invalid gem domains */
>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        }
>>>>        size = roundup(size, PAGE_SIZE);
>>>>    +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +        resv = vm->root.base.bo->tbo.resv;
>>>> +    }
>>>> +
>>>>        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>>                         (u32)(0xffffffff & args->in.domains),
>>>> -                     flags, false, &gobj);
>>>> +                     flags, false, resv, &gobj);
>>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> +        if (!r) {
>>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>> +
>>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>> +        }
>>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>>> +    }
>>>>        if (r)
>>>>            return r;
>>>>    @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>>> *dev, void *data,
>>>>        }
>>>>          /* create a gem object to contain this object in */
>>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>>> -                     0, &gobj);
>>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>> +                     0, 0, NULL, &gobj);
>>>>        if (r)
>>>>            return r;
>>>>    @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        struct amdgpu_bo_list_entry vm_pd;
>>>>        struct ttm_validate_buffer tv;
>>>>        struct ww_acquire_ctx ticket;
>>>> -    struct list_head list;
>>>> +    struct list_head list, duplicates;
>>>>        uint64_t va_flags;
>>>>        int r = 0;
>>>>    @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        }
>>>>          INIT_LIST_HEAD(&list);
>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>>            !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>>            gobj = drm_gem_object_lookup(filp, args->handle);
>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>>> *data,
>>>>          amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>>        if (r)
>>>>            goto error_unref;
>>>>    @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>    int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>>                struct drm_file *filp)
>>>>    {
>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>        struct drm_amdgpu_gem_op *args = data;
>>>>        struct drm_gem_object *gobj;
>>>>        struct amdgpu_bo *robj;
>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>>> *data,
>>>>            if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>>                robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>>    +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>>> +
>>>>            amdgpu_bo_unreserve(robj);
>>>>            break;
>>>>        default:
>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>> *file_priv,
>>>>        r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>                         AMDGPU_GEM_DOMAIN_VRAM,
>>>>                         AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>> -                     ttm_bo_type_device,
>>>> -                     &gobj);
>>>> +                     false, NULL, &gobj);
>>>>        if (r)
>>>>            return -ENOMEM;
>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> index 5b3f928..f407499 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>> drm_device *dev,
>>>>    {
>>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>    -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>            return ERR_PTR(-EPERM);
>>>>          return drm_gem_prime_export(dev, gobj, flags);
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>> b/include/uapi/drm/amdgpu_drm.h
>>>> index d0ee739..05241a6 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>>    #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>>    /* Flag that allocating the BO should use linear VRAM */
>>>>    #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>>> +/* Flag that BO is local in the VM */
>>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>>      struct drm_amdgpu_gem_create_in  {
>>>>        /** the requested memory size */
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
That's clever. I was scratching my head where the BOs were getting
validated, just by sharing the VM reservation object. Until I carefully
read your previous commit.

In general, I find the VM code extremely frustrating and confusing to
review. Too many lists of different things, and it's really hard to keep
track of which list track what type of object, in which situation.

For example, take struct amdgpu_vm:

	/* BOs who needs a validation */
	struct list_head	evicted;

	/* BOs moved, but not yet updated in the PT */
	struct list_head        moved;

	/* BO mappings freed, but not yet updated in the PT */
	struct list_head        freed;

Three lists of BOs (according to the comments). But evicted and moved
are lists of amdgpu_vm_bo_base, freed is a list of amdgpu_bo_va_mapping.
moved and freed are used for tracking BOs mapped in the VM. I think
moved may also track page table BOs, but I'm not sure. evicted is used
only for tracking page table BOs.

In patch #7 you add relocated to the mix. Now it gets really funny.
What's the difference between relocated and evicted and moved? It seems
PT BOs can be on any of these lists. I think evicted means the BO needs
to be validated. moved or relocated means it's been validated but its
mappings must be updated. For PT BOs and mapped BOs that means different
things, so it makes sense to have different lists for them. But I think
PT BOs can also end up on the moved list when amdgpu_vm_bo_invalidate is
called for a page table BO (through amdgpu_bo_move_notify). So I'm still
confused.

I think this could be clarified with more descriptive names for the
lists. If PT BOs and mapped BOs must be tracked separately that should
be clear from the names.

</rant>

Regards,
  Felix


On 2017-08-25 05:38 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Add the IOCTL interface so that applications can allocate per VM BOs.
>
> Still WIP since not all corner cases are tested yet, but this reduces average
> CS overhead for 10K BOs from 21ms down to 48us.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>  include/uapi/drm/amdgpu_drm.h             |  2 ++
>  5 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b1e817c..21cab36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>   */
>  void amdgpu_gem_force_release(struct amdgpu_device *adev);
>  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj);
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj);
>  
>  int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>  			    struct drm_device *dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> index 0e907ea..7256f83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>  				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>  				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>  				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
> -				       true, &gobj);
> +				       true, NULL, &gobj);
>  	if (ret) {
>  		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>  		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..b8e8d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>  }
>  
>  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj)
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj)
>  {
> -	struct amdgpu_bo *robj;
> +	struct amdgpu_bo *bo;
>  	int r;
>  
>  	*obj = NULL;
> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>  
>  retry:
>  	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
> -			     flags, NULL, NULL, 0, &robj);
> +			     flags, NULL, resv, 0, &bo);
>  	if (r) {
>  		if (r != -ERESTARTSYS) {
>  			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>  		}
>  		return r;
>  	}
> -	*obj = &robj->gem_base;
> +	*obj = &bo->gem_base;
>  
>  	return 0;
>  }
> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>  	struct amdgpu_vm *vm = &fpriv->vm;
>  
>  	struct amdgpu_bo_list_entry vm_pd;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>  	struct ttm_validate_buffer tv;
>  	struct ww_acquire_ctx ticket;
>  	struct amdgpu_bo_va *bo_va;
>  	int r;
>  
>  	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>  
>  	tv.bo = &bo->tbo;
>  	tv.shared = true;
> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>  
>  	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>  
> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>  	if (r) {
>  		dev_err(adev->dev, "leaking bo va because "
>  			"we fail to reserve bo (%d)\n", r);
> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  			    struct drm_file *filp)
>  {
>  	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_vm *vm = &fpriv->vm;
>  	union drm_amdgpu_gem_create *args = data;
>  	uint64_t flags = args->in.domain_flags;
>  	uint64_t size = args->in.bo_size;
> +	struct reservation_object *resv = NULL;
>  	struct drm_gem_object *gobj;
>  	uint32_t handle;
>  	int r;
> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>  		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>  		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
> +		      AMDGPU_GEM_CREATE_LOCAL))
>  		return -EINVAL;
>  
>  	/* reject invalid gem domains */
> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  	}
>  	size = roundup(size, PAGE_SIZE);
>  
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +		if (r)
> +			return r;
> +
> +		resv = vm->root.base.bo->tbo.resv;
> +	}
> +
>  	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>  				     (u32)(0xffffffff & args->in.domains),
> -				     flags, false, &gobj);
> +				     flags, false, resv, &gobj);
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		if (!r) {
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
> +		}
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>  	if (r)
>  		return r;
>  
> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	/* create a gem object to contain this object in */
> -	r = amdgpu_gem_object_create(adev, args->size, 0,
> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
> -				     0, &gobj);
> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
> +				     0, 0, NULL, &gobj);
>  	if (r)
>  		return r;
>  
> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  	struct amdgpu_bo_list_entry vm_pd;
>  	struct ttm_validate_buffer tv;
>  	struct ww_acquire_ctx ticket;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>  	uint64_t va_flags;
>  	int r = 0;
>  
> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>  	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>  	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>  		gobj = drm_gem_object_lookup(filp, args->handle);
> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  
>  	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>  
> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>  	if (r)
>  		goto error_unref;
>  
> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *filp)
>  {
> +	struct amdgpu_device *adev = dev->dev_private;
>  	struct drm_amdgpu_gem_op *args = data;
>  	struct drm_gem_object *gobj;
>  	struct amdgpu_bo *robj;
> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>  			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>  
> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
> +			amdgpu_vm_bo_invalidate(adev, robj, true);
> +
>  		amdgpu_bo_unreserve(robj);
>  		break;
>  	default:
> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>  	r = amdgpu_gem_object_create(adev, args->size, 0,
>  				     AMDGPU_GEM_DOMAIN_VRAM,
>  				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> -				     ttm_bo_type_device,
> -				     &gobj);
> +				     false, NULL, &gobj);
>  	if (r)
>  		return -ENOMEM;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..f407499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>  {
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>  
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>  		return ERR_PTR(-EPERM);
>  
>  	return drm_gem_prime_export(dev, gobj, flags);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d0ee739..05241a6 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -89,6 +89,8 @@ extern "C" {
>  #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>  /* Flag that allocating the BO should use linear VRAM */
>  #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
> +/* Flag that BO is local in the VM */
> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>  
>  struct drm_amdgpu_gem_create_in  {
>  	/** the requested memory size */
Am 25.08.2017 um 23:31 schrieb Felix Kuehling:
> That's clever. I was scratching my head where the BOs were getting
> validated, just by sharing the VM reservation object. Until I carefully
> read your previous commit.

Yeah, didn't had time to properly comment on your last mail.

> In general, I find the VM code extremely frustrating and confusing to
> review. Too many lists of different things, and it's really hard to keep
> track of which list track what type of object, in which situation.
>
> For example, take struct amdgpu_vm:
>
> 	/* BOs who needs a validation */
> 	struct list_head	evicted;
>
> 	/* BOs moved, but not yet updated in the PT */
> 	struct list_head        moved;
>
> 	/* BO mappings freed, but not yet updated in the PT */
> 	struct list_head        freed;
>
> Three lists of BOs (according to the comments). But evicted and moved
> are lists of amdgpu_vm_bo_base, freed is a list of amdgpu_bo_va_mapping.
> moved and freed are used for tracking BOs mapped in the VM. I think
> moved may also track page table BOs, but I'm not sure. evicted is used
> only for tracking page table BOs.
>
> In patch #7 you add relocated to the mix. Now it gets really funny.
> What's the difference between relocated and evicted and moved?

Essentially nothing. I actually tried to merge them, but then realized 
that this would probably remove the ability to only update the 
directories and so most likely your KFD usage of that.

> It seems
> PT BOs can be on any of these lists. I think evicted means the BO needs
> to be validated. moved or relocated means it's been validated but its
> mappings must be updated. For PT BOs and mapped BOs that means different
> things, so it makes sense to have different lists for them. But I think
> PT BOs can also end up on the moved list when amdgpu_vm_bo_invalidate is
> called for a page table BO (through amdgpu_bo_move_notify). So I'm still
> confused.

amdgpu_vm_bo_invalidate() has logic to always put PDs and PTs on the 
relocated list and everything else on the moved list. So PDs/PTs should 
never end up on the moved list.

> I think this could be clarified with more descriptive names for the
> lists. If PT BOs and mapped BOs must be tracked separately that should
> be clear from the names.

That is a good idea, but in the long term I want to merge reloacted and 
moved list and then decide while walking the list what to do, but that 
is the next step I think.

Regards,
Christian.

>
> </rant>
>
> Regards,
>    Felix
>
>
> On 2017-08-25 05:38 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces average
>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>    */
>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj);
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj);
>>   
>>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>   			    struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>>   				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>   				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>   				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> -				       true, &gobj);
>> +				       true, NULL, &gobj);
>>   	if (ret) {
>>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>   		return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d028806..b8e8d67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>>   }
>>   
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj)
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj)
>>   {
>> -	struct amdgpu_bo *robj;
>> +	struct amdgpu_bo *bo;
>>   	int r;
>>   
>>   	*obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>>   
>>   retry:
>>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>> -			     flags, NULL, NULL, 0, &robj);
>> +			     flags, NULL, resv, 0, &bo);
>>   	if (r) {
>>   		if (r != -ERESTARTSYS) {
>>   			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>>   		}
>>   		return r;
>>   	}
>> -	*obj = &robj->gem_base;
>> +	*obj = &bo->gem_base;
>>   
>>   	return 0;
>>   }
>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>   	struct amdgpu_vm *vm = &fpriv->vm;
>>   
>>   	struct amdgpu_bo_list_entry vm_pd;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>>   	struct amdgpu_bo_va *bo_va;
>>   	int r;
>>   
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>   
>>   	tv.bo = &bo->tbo;
>>   	tv.shared = true;
>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>   
>>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>   
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>   	if (r) {
>>   		dev_err(adev->dev, "leaking bo va because "
>>   			"we fail to reserve bo (%d)\n", r);
>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   			    struct drm_file *filp)
>>   {
>>   	struct amdgpu_device *adev = dev->dev_private;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	struct amdgpu_vm *vm = &fpriv->vm;
>>   	union drm_amdgpu_gem_create *args = data;
>>   	uint64_t flags = args->in.domain_flags;
>>   	uint64_t size = args->in.bo_size;
>> +	struct reservation_object *resv = NULL;
>>   	struct drm_gem_object *gobj;
>>   	uint32_t handle;
>>   	int r;
>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> +		      AMDGPU_GEM_CREATE_LOCAL))
>>   		return -EINVAL;
>>   
>>   	/* reject invalid gem domains */
>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   	size = roundup(size, PAGE_SIZE);
>>   
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +		if (r)
>> +			return r;
>> +
>> +		resv = vm->root.base.bo->tbo.resv;
>> +	}
>> +
>>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>   				     (u32)(0xffffffff & args->in.domains),
>> -				     flags, false, &gobj);
>> +				     flags, false, resv, &gobj);
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		if (!r) {
>> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>> +
>> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> +		}
>> +		amdgpu_bo_unreserve(vm->root.base.bo);
>> +	}
>>   	if (r)
>>   		return r;
>>   
>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   
>>   	/* create a gem object to contain this object in */
>> -	r = amdgpu_gem_object_create(adev, args->size, 0,
>> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
>> -				     0, &gobj);
>> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
>> +				     0, 0, NULL, &gobj);
>>   	if (r)
>>   		return r;
>>   
>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   	struct amdgpu_bo_list_entry vm_pd;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	uint64_t va_flags;
>>   	int r = 0;
>>   
>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>   		gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   
>>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>   
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>   	if (r)
>>   		goto error_unref;
>>   
>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   			struct drm_file *filp)
>>   {
>> +	struct amdgpu_device *adev = dev->dev_private;
>>   	struct drm_amdgpu_gem_op *args = data;
>>   	struct drm_gem_object *gobj;
>>   	struct amdgpu_bo *robj;
>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>   			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>   
>> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> +			amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>>   		amdgpu_bo_unreserve(robj);
>>   		break;
>>   	default:
>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>>   				     AMDGPU_GEM_DOMAIN_VRAM,
>>   				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> -				     ttm_bo_type_device,
>> -				     &gobj);
>> +				     false, NULL, &gobj);
>>   	if (r)
>>   		return -ENOMEM;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>   {
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>   
>> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>   		return ERR_PTR(-EPERM);
>>   
>>   	return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>>   /* Flag that allocating the BO should use linear VRAM */
>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>>   
>>   struct drm_amdgpu_gem_create_in  {
>>   	/** the requested memory size */
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx