[6/8] drm/amdgpu: allocate VM PDs/PTs on demand

Submitted by Christian König on Feb. 4, 2019, 12:42 p.m.

Details

Message ID 20190204124256.1765-6-christian.koenig@amd.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Christian König Feb. 4, 2019, 12:42 p.m.
Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
 5 files changed, 38 insertions(+), 126 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..2176c92f9377 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -492,12 +492,6 @@  static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 					vm->process_info->eviction_fence,
 					NULL, NULL);
 
-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
-	if (ret) {
-		pr_err("Failed to allocate pts, err=%d\n", ret);
-		goto err_alloc_pts;
-	}
-
 	ret = vm_validate_pt_pd_bos(vm);
 	if (ret) {
 		pr_err("validate_pt_pd_bos() failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@  int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return -ENOMEM;
 	}
 
-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
-				size);
-	if (r) {
-		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
-		amdgpu_vm_bo_rmv(adev, *bo_va);
-		ttm_eu_backoff_reservation(&ticket, &list);
-		return r;
-	}
-
 	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 			     AMDGPU_PTE_EXECUTABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d21dd2f369da..e141e3b13112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -627,11 +627,6 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
-		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-					args->map_size);
-		if (r)
-			goto error_backoff;
-
 		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 				     args->offset_in_bo, args->map_size,
@@ -647,11 +642,6 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 						args->map_size);
 		break;
 	case AMDGPU_VA_OP_REPLACE:
-		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-					args->map_size);
-		if (r)
-			goto error_backoff;
-
 		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 					     args->offset_in_bo, args->map_size,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f7d410a5d848..0b8a1aa56c4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -504,47 +504,6 @@  static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
 	}
 }
 
-/**
- * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm structure
- * @start: start addr of the walk
- * @cursor: state to initialize
- *
- * Start a walk and go directly to the leaf node.
- */
-static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm, uint64_t start,
-				    struct amdgpu_vm_pt_cursor *cursor)
-{
-	amdgpu_vm_pt_start(adev, vm, start, cursor);
-	while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk the PD/PT tree to the next leaf node.
- */
-static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
-				   struct amdgpu_vm_pt_cursor *cursor)
-{
-	amdgpu_vm_pt_next(adev, cursor);
-	if (cursor->pfn != ~0ll)
-		while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
- */
-#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
-	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
-	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
-
 /**
  * amdgpu_vm_pt_first_dfs - start a deep first search
  *
@@ -919,74 +878,51 @@  static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * Returns:
  * 0 on success, errno otherwise.
  */
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
-			struct amdgpu_vm *vm,
-			uint64_t saddr, uint64_t size)
+static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       struct amdgpu_vm_pt_cursor *cursor)
 {
-	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_pt *entry = cursor->entry;
+	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *pt;
-	uint64_t eaddr;
 	int r;
 
-	/* validate the parameters */
-	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
-		return -EINVAL;
+	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
+		unsigned num_entries;
 
-	eaddr = saddr + size - 1;
-
-	saddr /= AMDGPU_GPU_PAGE_SIZE;
-	eaddr /= AMDGPU_GPU_PAGE_SIZE;
-
-	if (eaddr >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-			eaddr, adev->vm_manager.max_pfn);
-		return -EINVAL;
+		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
+		entry->entries = kvmalloc_array(num_entries,
+						sizeof(*entry->entries),
+						GFP_KERNEL | __GFP_ZERO);
+		if (!entry->entries)
+			return -ENOMEM;
 	}
 
-	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
-		struct amdgpu_vm_pt *entry = cursor.entry;
-		struct amdgpu_bo_param bp;
-
-		if (cursor.level < AMDGPU_VM_PTB) {
-			unsigned num_entries;
-
-			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
-			entry->entries = kvmalloc_array(num_entries,
-							sizeof(*entry->entries),
-							GFP_KERNEL |
-							__GFP_ZERO);
-			if (!entry->entries)
-				return -ENOMEM;
-		}
-
-
-		if (entry->base.bo)
-			continue;
-
-		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
-
-		r = amdgpu_bo_create(adev, &bp, &pt);
-		if (r)
-			return r;
-
-		if (vm->use_cpu_for_update) {
-			r = amdgpu_bo_kmap(pt, NULL);
-			if (r)
-				goto error_free_pt;
-		}
+	if (entry->base.bo)
+		return 0;
 
-		/* Keep a reference to the root directory to avoid
-		* freeing them up in the wrong order.
-		*/
-		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
+	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
 
-		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+	r = amdgpu_bo_create(adev, &bp, &pt);
+	if (r)
+		return r;
 
-		r = amdgpu_vm_clear_bo(adev, vm, pt);
+	if (vm->use_cpu_for_update) {
+		r = amdgpu_bo_kmap(pt, NULL);
 		if (r)
 			goto error_free_pt;
 	}
 
+	/* Keep a reference to the root directory to avoid
+	 * freeing them up in the wrong order.
+	 */
+	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
+	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+
+	r = amdgpu_vm_clear_bo(adev, vm, pt);
+	if (r)
+		goto error_free_pt;
+
 	return 0;
 
 error_free_pt:
@@ -1655,6 +1591,7 @@  static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_vm_pt_cursor cursor;
 	uint64_t frag_start = start, frag_end;
 	unsigned int frag;
+	int r;
 
 	/* figure out the initial fragment */
 	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
@@ -1662,12 +1599,15 @@  static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	/* walk over the address space and update the PTs */
 	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
 	while (cursor.pfn < end) {
-		struct amdgpu_bo *pt = cursor.entry->base.bo;
 		unsigned shift, parent_shift, mask;
 		uint64_t incr, entry_end, pe_start;
+		struct amdgpu_bo *pt;
 
-		if (!pt)
-			return -ENOENT;
+		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+		if (r)
+			return r;
+
+		pt = cursor.entry->base.bo;
 
 		/* The root level can't be a huge page */
 		if (cursor.level == adev->vm_manager.root_level) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 81ff8177f092..116605c038d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -303,9 +303,6 @@  bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
-			struct amdgpu_vm *vm,
-			uint64_t saddr, uint64_t size);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);

Comments

Kuehling, Felix Feb. 4, 2019, 8:17 p.m.
This may cause regressions in KFD if PT BO allocation can trigger 
eviction fences. The previous call to amdgpu_vm_alloc_pts was in a 
context where we had temporarily removed the eviction fence. PT BO 
allocation in amdgpu_vm_bo_update is not protected like that.

I vaguely remember looking into this last time you were working on our 
eviction fence code and thinking that most of the cases where we remove 
the eviction fence were no longer needed if fence synchronization 
happens through the amdgpu_sync-object API (rather than waiting on a 
reservation object directly). I think it's time I go and finally clean 
that up.

Do you know whether PT BO allocation would wait on the page-directory 
reservation object without the sync-object API anywhere?

Regards,
   Felix

On 2019-02-04 7:42 a.m., Christian König wrote:
> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating

> them during mapping.

>

> Signed-off-by: Christian König <christian.koenig@amd.com>

> ---

>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>   5 files changed, 38 insertions(+), 126 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> index d7b10d79f1de..2176c92f9377 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>   					vm->process_info->eviction_fence,

>   					NULL, NULL);

>   

> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

> -	if (ret) {

> -		pr_err("Failed to allocate pts, err=%d\n", ret);

> -		goto err_alloc_pts;

> -	}

> -

>   	ret = vm_validate_pt_pd_bos(vm);

>   	if (ret) {

>   		pr_err("validate_pt_pd_bos() failed\n");

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> index 7e22be7ca68a..54dd02a898b9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>   		return -ENOMEM;

>   	}

>   

> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

> -				size);

> -	if (r) {

> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

> -		amdgpu_vm_bo_rmv(adev, *bo_va);

> -		ttm_eu_backoff_reservation(&ticket, &list);

> -		return r;

> -	}

> -

>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>   			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>   			     AMDGPU_PTE_EXECUTABLE);

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> index d21dd2f369da..e141e3b13112 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>   

>   	switch (args->operation) {

>   	case AMDGPU_VA_OP_MAP:

> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

> -					args->map_size);

> -		if (r)

> -			goto error_backoff;

> -

>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>   				     args->offset_in_bo, args->map_size,

> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>   						args->map_size);

>   		break;

>   	case AMDGPU_VA_OP_REPLACE:

> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

> -					args->map_size);

> -		if (r)

> -			goto error_backoff;

> -

>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>   					     args->offset_in_bo, args->map_size,

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index f7d410a5d848..0b8a1aa56c4a 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>   	}

>   }

>   

> -/**

> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

> - *

> - * @adev: amdgpu_device pointer

> - * @vm: amdgpu_vm structure

> - * @start: start addr of the walk

> - * @cursor: state to initialize

> - *

> - * Start a walk and go directly to the leaf node.

> - */

> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

> -				    struct amdgpu_vm *vm, uint64_t start,

> -				    struct amdgpu_vm_pt_cursor *cursor)

> -{

> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

> -	while (amdgpu_vm_pt_descendant(adev, cursor));

> -}

> -

> -/**

> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

> - *

> - * @adev: amdgpu_device pointer

> - * @cursor: current state

> - *

> - * Walk the PD/PT tree to the next leaf node.

> - */

> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

> -				   struct amdgpu_vm_pt_cursor *cursor)

> -{

> -	amdgpu_vm_pt_next(adev, cursor);

> -	if (cursor->pfn != ~0ll)

> -		while (amdgpu_vm_pt_descendant(adev, cursor));

> -}

> -

> -/**

> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy

> - */

> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

> -

>   /**

>    * amdgpu_vm_pt_first_dfs - start a deep first search

>    *

> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>    * Returns:

>    * 0 on success, errno otherwise.

>    */

> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> -			struct amdgpu_vm *vm,

> -			uint64_t saddr, uint64_t size)

> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> +			       struct amdgpu_vm *vm,

> +			       struct amdgpu_vm_pt_cursor *cursor)

>   {

> -	struct amdgpu_vm_pt_cursor cursor;

> +	struct amdgpu_vm_pt *entry = cursor->entry;

> +	struct amdgpu_bo_param bp;

>   	struct amdgpu_bo *pt;

> -	uint64_t eaddr;

>   	int r;

>   

> -	/* validate the parameters */

> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

> -		return -EINVAL;

> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

> +		unsigned num_entries;

>   

> -	eaddr = saddr + size - 1;

> -

> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

> -

> -	if (eaddr >= adev->vm_manager.max_pfn) {

> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

> -			eaddr, adev->vm_manager.max_pfn);

> -		return -EINVAL;

> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

> +		entry->entries = kvmalloc_array(num_entries,

> +						sizeof(*entry->entries),

> +						GFP_KERNEL | __GFP_ZERO);

> +		if (!entry->entries)

> +			return -ENOMEM;

>   	}

>   

> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

> -		struct amdgpu_vm_pt *entry = cursor.entry;

> -		struct amdgpu_bo_param bp;

> -

> -		if (cursor.level < AMDGPU_VM_PTB) {

> -			unsigned num_entries;

> -

> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

> -			entry->entries = kvmalloc_array(num_entries,

> -							sizeof(*entry->entries),

> -							GFP_KERNEL |

> -							__GFP_ZERO);

> -			if (!entry->entries)

> -				return -ENOMEM;

> -		}

> -

> -

> -		if (entry->base.bo)

> -			continue;

> -

> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

> -

> -		r = amdgpu_bo_create(adev, &bp, &pt);

> -		if (r)

> -			return r;

> -

> -		if (vm->use_cpu_for_update) {

> -			r = amdgpu_bo_kmap(pt, NULL);

> -			if (r)

> -				goto error_free_pt;

> -		}

> +	if (entry->base.bo)

> +		return 0;

>   

> -		/* Keep a reference to the root directory to avoid

> -		* freeing them up in the wrong order.

> -		*/

> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>   

> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

> +	r = amdgpu_bo_create(adev, &bp, &pt);

> +	if (r)

> +		return r;

>   

> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

> +	if (vm->use_cpu_for_update) {

> +		r = amdgpu_bo_kmap(pt, NULL);

>   		if (r)

>   			goto error_free_pt;

>   	}

>   

> +	/* Keep a reference to the root directory to avoid

> +	 * freeing them up in the wrong order.

> +	 */

> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

> +

> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

> +	if (r)

> +		goto error_free_pt;

> +

>   	return 0;

>   

>   error_free_pt:

> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>   	struct amdgpu_vm_pt_cursor cursor;

>   	uint64_t frag_start = start, frag_end;

>   	unsigned int frag;

> +	int r;

>   

>   	/* figure out the initial fragment */

>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);

> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>   	/* walk over the address space and update the PTs */

>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>   	while (cursor.pfn < end) {

> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>   		unsigned shift, parent_shift, mask;

>   		uint64_t incr, entry_end, pe_start;

> +		struct amdgpu_bo *pt;

>   

> -		if (!pt)

> -			return -ENOENT;

> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

> +		if (r)

> +			return r;

> +

> +		pt = cursor.entry->base.bo;

>   

>   		/* The root level can't be a huge page */

>   		if (cursor.level == adev->vm_manager.root_level) {

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> index 81ff8177f092..116605c038d2 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>   			      int (*callback)(void *p, struct amdgpu_bo *bo),

>   			      void *param);

> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> -			struct amdgpu_vm *vm,

> -			uint64_t saddr, uint64_t size);

>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>   int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>   				 struct amdgpu_vm *vm);
Kuehling, Felix Feb. 5, 2019, 12:33 a.m.
On 2019-02-04 3:17 p.m., Kuehling, Felix wrote:
> This may cause regressions in KFD if PT BO allocation can trigger

> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a

> context where we had temporarily removed the eviction fence. PT BO

> allocation in amdgpu_vm_bo_update is not protected like that.

>

> I vaguely remember looking into this last time you were working on our

> eviction fence code and thinking that most of the cases where we remove

> the eviction fence were no longer needed if fence synchronization

> happens through the amdgpu_sync-object API (rather than waiting on a

> reservation object directly). I think it's time I go and finally clean

> that up.


I'm looking at this now. It's not working as I was hoping.


>

> Do you know whether PT BO allocation would wait on the page-directory

> reservation object without the sync-object API anywhere?


It doesn't even matter. Buffer moves (anything calling amdgpu_sync_resv 
with AMDGPU_FENCE_OWNER_UNDEFINED) are supposed to trigger eviction 
fences. So page table BO allocation triggers eviction fences on the PD 
reservation. I don't know how to avoid this other than by removing the 
eviction fence while allocating PT BOs. With your changes this will be 
potentially every time we map or unmap memory.

Any better ideas?

Regards,
   Felix


>

> Regards,

>     Felix

>

> On 2019-02-04 7:42 a.m., Christian König wrote:

>> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating

>> them during mapping.

>>

>> Signed-off-by: Christian König <christian.koenig@amd.com>

>> ---

>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>>    5 files changed, 38 insertions(+), 126 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> index d7b10d79f1de..2176c92f9377 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>>    					vm->process_info->eviction_fence,

>>    					NULL, NULL);

>>    

>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

>> -	if (ret) {

>> -		pr_err("Failed to allocate pts, err=%d\n", ret);

>> -		goto err_alloc_pts;

>> -	}

>> -

>>    	ret = vm_validate_pt_pd_bos(vm);

>>    	if (ret) {

>>    		pr_err("validate_pt_pd_bos() failed\n");

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> index 7e22be7ca68a..54dd02a898b9 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>    		return -ENOMEM;

>>    	}

>>    

>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

>> -				size);

>> -	if (r) {

>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

>> -		amdgpu_vm_bo_rmv(adev, *bo_va);

>> -		ttm_eu_backoff_reservation(&ticket, &list);

>> -		return r;

>> -	}

>> -

>>    	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>>    			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>>    			     AMDGPU_PTE_EXECUTABLE);

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> index d21dd2f369da..e141e3b13112 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>>    

>>    	switch (args->operation) {

>>    	case AMDGPU_VA_OP_MAP:

>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>> -					args->map_size);

>> -		if (r)

>> -			goto error_backoff;

>> -

>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>    		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>>    				     args->offset_in_bo, args->map_size,

>> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>>    						args->map_size);

>>    		break;

>>    	case AMDGPU_VA_OP_REPLACE:

>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>> -					args->map_size);

>> -		if (r)

>> -			goto error_backoff;

>> -

>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>    		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>>    					     args->offset_in_bo, args->map_size,

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> index f7d410a5d848..0b8a1aa56c4a 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>>    	}

>>    }

>>    

>> -/**

>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

>> - *

>> - * @adev: amdgpu_device pointer

>> - * @vm: amdgpu_vm structure

>> - * @start: start addr of the walk

>> - * @cursor: state to initialize

>> - *

>> - * Start a walk and go directly to the leaf node.

>> - */

>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

>> -				    struct amdgpu_vm *vm, uint64_t start,

>> -				    struct amdgpu_vm_pt_cursor *cursor)

>> -{

>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

>> -	while (amdgpu_vm_pt_descendant(adev, cursor));

>> -}

>> -

>> -/**

>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

>> - *

>> - * @adev: amdgpu_device pointer

>> - * @cursor: current state

>> - *

>> - * Walk the PD/PT tree to the next leaf node.

>> - */

>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

>> -				   struct amdgpu_vm_pt_cursor *cursor)

>> -{

>> -	amdgpu_vm_pt_next(adev, cursor);

>> -	if (cursor->pfn != ~0ll)

>> -		while (amdgpu_vm_pt_descendant(adev, cursor));

>> -}

>> -

>> -/**

>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy

>> - */

>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

>> -

>>    /**

>>     * amdgpu_vm_pt_first_dfs - start a deep first search

>>     *

>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>     * Returns:

>>     * 0 on success, errno otherwise.

>>     */

>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> -			struct amdgpu_vm *vm,

>> -			uint64_t saddr, uint64_t size)

>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> +			       struct amdgpu_vm *vm,

>> +			       struct amdgpu_vm_pt_cursor *cursor)

>>    {

>> -	struct amdgpu_vm_pt_cursor cursor;

>> +	struct amdgpu_vm_pt *entry = cursor->entry;

>> +	struct amdgpu_bo_param bp;

>>    	struct amdgpu_bo *pt;

>> -	uint64_t eaddr;

>>    	int r;

>>    

>> -	/* validate the parameters */

>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

>> -		return -EINVAL;

>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

>> +		unsigned num_entries;

>>    

>> -	eaddr = saddr + size - 1;

>> -

>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

>> -

>> -	if (eaddr >= adev->vm_manager.max_pfn) {

>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

>> -			eaddr, adev->vm_manager.max_pfn);

>> -		return -EINVAL;

>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

>> +		entry->entries = kvmalloc_array(num_entries,

>> +						sizeof(*entry->entries),

>> +						GFP_KERNEL | __GFP_ZERO);

>> +		if (!entry->entries)

>> +			return -ENOMEM;

>>    	}

>>    

>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

>> -		struct amdgpu_vm_pt *entry = cursor.entry;

>> -		struct amdgpu_bo_param bp;

>> -

>> -		if (cursor.level < AMDGPU_VM_PTB) {

>> -			unsigned num_entries;

>> -

>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

>> -			entry->entries = kvmalloc_array(num_entries,

>> -							sizeof(*entry->entries),

>> -							GFP_KERNEL |

>> -							__GFP_ZERO);

>> -			if (!entry->entries)

>> -				return -ENOMEM;

>> -		}

>> -

>> -

>> -		if (entry->base.bo)

>> -			continue;

>> -

>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

>> -

>> -		r = amdgpu_bo_create(adev, &bp, &pt);

>> -		if (r)

>> -			return r;

>> -

>> -		if (vm->use_cpu_for_update) {

>> -			r = amdgpu_bo_kmap(pt, NULL);

>> -			if (r)

>> -				goto error_free_pt;

>> -		}

>> +	if (entry->base.bo)

>> +		return 0;

>>    

>> -		/* Keep a reference to the root directory to avoid

>> -		* freeing them up in the wrong order.

>> -		*/

>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>>    

>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>> +	r = amdgpu_bo_create(adev, &bp, &pt);

>> +	if (r)

>> +		return r;

>>    

>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

>> +	if (vm->use_cpu_for_update) {

>> +		r = amdgpu_bo_kmap(pt, NULL);

>>    		if (r)

>>    			goto error_free_pt;

>>    	}

>>    

>> +	/* Keep a reference to the root directory to avoid

>> +	 * freeing them up in the wrong order.

>> +	 */

>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>> +

>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

>> +	if (r)

>> +		goto error_free_pt;

>> +

>>    	return 0;

>>    

>>    error_free_pt:

>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>    	struct amdgpu_vm_pt_cursor cursor;

>>    	uint64_t frag_start = start, frag_end;

>>    	unsigned int frag;

>> +	int r;

>>    

>>    	/* figure out the initial fragment */

>>    	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);

>> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>    	/* walk over the address space and update the PTs */

>>    	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>>    	while (cursor.pfn < end) {

>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>>    		unsigned shift, parent_shift, mask;

>>    		uint64_t incr, entry_end, pe_start;

>> +		struct amdgpu_bo *pt;

>>    

>> -		if (!pt)

>> -			return -ENOENT;

>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

>> +		if (r)

>> +			return r;

>> +

>> +		pt = cursor.entry->base.bo;

>>    

>>    		/* The root level can't be a huge page */

>>    		if (cursor.level == adev->vm_manager.root_level) {

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> index 81ff8177f092..116605c038d2 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>>    int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>    			      int (*callback)(void *p, struct amdgpu_bo *bo),

>>    			      void *param);

>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> -			struct amdgpu_vm *vm,

>> -			uint64_t saddr, uint64_t size);

>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>>    int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>>    				 struct amdgpu_vm *vm);

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Christian König Feb. 5, 2019, 11:37 a.m.
> This may cause regressions in KFD if PT BO allocation can trigger
> eviction fences.
I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables 
from the same process and that seriously doesn't make much sense.

> Do you know whether PT BO allocation would wait on the page-directory
> reservation object without the sync-object API anywhere?
For inside the kernel I can't think of any relevant use case, except for 
the eviction call path and there it is intentional.

One thing which will always wait for all fences is the display code 
path, but that is not relevant here. (Isn't it?).

What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think 
this is also completely intentional that we wait on everything here.

Regards,
Christian.

Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
> This may cause regressions in KFD if PT BO allocation can trigger
> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
> context where we had temporarily removed the eviction fence. PT BO
> allocation in amdgpu_vm_bo_update is not protected like that.
>
> I vaguely remember looking into this last time you were working on our
> eviction fence code and thinking that most of the cases where we remove
> the eviction fence were no longer needed if fence synchronization
> happens through the amdgpu_sync-object API (rather than waiting on a
> reservation object directly). I think it's time I go and finally clean
> that up.
>
> Do you know whether PT BO allocation would wait on the page-directory
> reservation object without the sync-object API anywhere?
>
> Regards,
>     Felix
>
> On 2019-02-04 7:42 a.m., Christian König wrote:
>> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
>> them during mapping.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>    5 files changed, 38 insertions(+), 126 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index d7b10d79f1de..2176c92f9377 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
>>    					vm->process_info->eviction_fence,
>>    					NULL, NULL);
>>    
>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>> -	if (ret) {
>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>> -		goto err_alloc_pts;
>> -	}
>> -
>>    	ret = vm_validate_pt_pd_bos(vm);
>>    	if (ret) {
>>    		pr_err("validate_pt_pd_bos() failed\n");
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> index 7e22be7ca68a..54dd02a898b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>    		return -ENOMEM;
>>    	}
>>    
>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>> -				size);
>> -	if (r) {
>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>> -		ttm_eu_backoff_reservation(&ticket, &list);
>> -		return r;
>> -	}
>> -
>>    	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>    			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
>>    			     AMDGPU_PTE_EXECUTABLE);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d21dd2f369da..e141e3b13112 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>    
>>    	switch (args->operation) {
>>    	case AMDGPU_VA_OP_MAP:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>    		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>    				     args->offset_in_bo, args->map_size,
>> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>    						args->map_size);
>>    		break;
>>    	case AMDGPU_VA_OP_REPLACE:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>    		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
>>    					     args->offset_in_bo, args->map_size,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index f7d410a5d848..0b8a1aa56c4a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
>>    	}
>>    }
>>    
>> -/**
>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @vm: amdgpu_vm structure
>> - * @start: start addr of the walk
>> - * @cursor: state to initialize
>> - *
>> - * Start a walk and go directly to the leaf node.
>> - */
>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>> -				    struct amdgpu_vm *vm, uint64_t start,
>> -				    struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @cursor: current state
>> - *
>> - * Walk the PD/PT tree to the next leaf node.
>> - */
>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>> -				   struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_next(adev, cursor);
>> -	if (cursor->pfn != ~0ll)
>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
>> - */
>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
>> -
>>    /**
>>     * amdgpu_vm_pt_first_dfs - start a deep first search
>>     *
>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>     * Returns:
>>     * 0 on success, errno otherwise.
>>     */
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size)
>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> +			       struct amdgpu_vm *vm,
>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>    {
>> -	struct amdgpu_vm_pt_cursor cursor;
>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>> +	struct amdgpu_bo_param bp;
>>    	struct amdgpu_bo *pt;
>> -	uint64_t eaddr;
>>    	int r;
>>    
>> -	/* validate the parameters */
>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
>> -		return -EINVAL;
>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>> +		unsigned num_entries;
>>    
>> -	eaddr = saddr + size - 1;
>> -
>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>> -
>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>> -			eaddr, adev->vm_manager.max_pfn);
>> -		return -EINVAL;
>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>> +		entry->entries = kvmalloc_array(num_entries,
>> +						sizeof(*entry->entries),
>> +						GFP_KERNEL | __GFP_ZERO);
>> +		if (!entry->entries)
>> +			return -ENOMEM;
>>    	}
>>    
>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>> -		struct amdgpu_bo_param bp;
>> -
>> -		if (cursor.level < AMDGPU_VM_PTB) {
>> -			unsigned num_entries;
>> -
>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
>> -			entry->entries = kvmalloc_array(num_entries,
>> -							sizeof(*entry->entries),
>> -							GFP_KERNEL |
>> -							__GFP_ZERO);
>> -			if (!entry->entries)
>> -				return -ENOMEM;
>> -		}
>> -
>> -
>> -		if (entry->base.bo)
>> -			continue;
>> -
>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>> -
>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>> -		if (r)
>> -			return r;
>> -
>> -		if (vm->use_cpu_for_update) {
>> -			r = amdgpu_bo_kmap(pt, NULL);
>> -			if (r)
>> -				goto error_free_pt;
>> -		}
>> +	if (entry->base.bo)
>> +		return 0;
>>    
>> -		/* Keep a reference to the root directory to avoid
>> -		* freeing them up in the wrong order.
>> -		*/
>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>    
>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>> +	if (r)
>> +		return r;
>>    
>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (vm->use_cpu_for_update) {
>> +		r = amdgpu_bo_kmap(pt, NULL);
>>    		if (r)
>>    			goto error_free_pt;
>>    	}
>>    
>> +	/* Keep a reference to the root directory to avoid
>> +	 * freeing them up in the wrong order.
>> +	 */
>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +
>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (r)
>> +		goto error_free_pt;
>> +
>>    	return 0;
>>    
>>    error_free_pt:
>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>    	struct amdgpu_vm_pt_cursor cursor;
>>    	uint64_t frag_start = start, frag_end;
>>    	unsigned int frag;
>> +	int r;
>>    
>>    	/* figure out the initial fragment */
>>    	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
>> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>    	/* walk over the address space and update the PTs */
>>    	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>    	while (cursor.pfn < end) {
>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>    		unsigned shift, parent_shift, mask;
>>    		uint64_t incr, entry_end, pe_start;
>> +		struct amdgpu_bo *pt;
>>    
>> -		if (!pt)
>> -			return -ENOENT;
>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
>> +		if (r)
>> +			return r;
>> +
>> +		pt = cursor.entry->base.bo;
>>    
>>    		/* The root level can't be a huge page */
>>    		if (cursor.level == adev->vm_manager.root_level) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 81ff8177f092..116605c038d2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>    int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>    			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>    			      void *param);
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size);
>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
>>    int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>    				 struct amdgpu_vm *vm);
Christian König Feb. 5, 2019, 11:40 a.m.
Am 05.02.19 um 01:33 schrieb Kuehling, Felix:
> On 2019-02-04 3:17 p.m., Kuehling, Felix wrote:
>> This may cause regressions in KFD if PT BO allocation can trigger
>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
>> context where we had temporarily removed the eviction fence. PT BO
>> allocation in amdgpu_vm_bo_update is not protected like that.
>>
>> I vaguely remember looking into this last time you were working on our
>> eviction fence code and thinking that most of the cases where we remove
>> the eviction fence were no longer needed if fence synchronization
>> happens through the amdgpu_sync-object API (rather than waiting on a
>> reservation object directly). I think it's time I go and finally clean
>> that up.
> I'm looking at this now. It's not working as I was hoping.
>
>
>> Do you know whether PT BO allocation would wait on the page-directory
>> reservation object without the sync-object API anywhere?
> It doesn't even matter. Buffer moves (anything calling amdgpu_sync_resv
> with AMDGPU_FENCE_OWNER_UNDEFINED) are supposed to trigger eviction
> fences. So page table BO allocation triggers eviction fences on the PD
> reservation. I don't know how to avoid this other than by removing the
> eviction fence while allocating PT BOs. With your changes this will be
> potentially every time we map or unmap memory.
>
> Any better ideas?

Let me take a closer look what exactly is happening here.

Regards,
Christian.

>
> Regards,
>     Felix
>
>
>> Regards,
>>      Felix
>>
>> On 2019-02-04 7:42 a.m., Christian König wrote:
>>> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
>>> them during mapping.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>     5 files changed, 38 insertions(+), 126 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index d7b10d79f1de..2176c92f9377 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>     					vm->process_info->eviction_fence,
>>>     					NULL, NULL);
>>>     
>>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>> -	if (ret) {
>>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>>> -		goto err_alloc_pts;
>>> -	}
>>> -
>>>     	ret = vm_validate_pt_pd_bos(vm);
>>>     	if (ret) {
>>>     		pr_err("validate_pt_pd_bos() failed\n");
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> index 7e22be7ca68a..54dd02a898b9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>     		return -ENOMEM;
>>>     	}
>>>     
>>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>> -				size);
>>> -	if (r) {
>>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
>>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>>> -		ttm_eu_backoff_reservation(&ticket, &list);
>>> -		return r;
>>> -	}
>>> -
>>>     	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>     			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
>>>     			     AMDGPU_PTE_EXECUTABLE);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d21dd2f369da..e141e3b13112 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>>     
>>>     	switch (args->operation) {
>>>     	case AMDGPU_VA_OP_MAP:
>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>>> -					args->map_size);
>>> -		if (r)
>>> -			goto error_backoff;
>>> -
>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>     		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>     				     args->offset_in_bo, args->map_size,
>>> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>>     						args->map_size);
>>>     		break;
>>>     	case AMDGPU_VA_OP_REPLACE:
>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>>> -					args->map_size);
>>> -		if (r)
>>> -			goto error_backoff;
>>> -
>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>     		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
>>>     					     args->offset_in_bo, args->map_size,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index f7d410a5d848..0b8a1aa56c4a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
>>>     	}
>>>     }
>>>     
>>> -/**
>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>> - *
>>> - * @adev: amdgpu_device pointer
>>> - * @vm: amdgpu_vm structure
>>> - * @start: start addr of the walk
>>> - * @cursor: state to initialize
>>> - *
>>> - * Start a walk and go directly to the leaf node.
>>> - */
>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>> -				    struct amdgpu_vm *vm, uint64_t start,
>>> -				    struct amdgpu_vm_pt_cursor *cursor)
>>> -{
>>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>>> -}
>>> -
>>> -/**
>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>> - *
>>> - * @adev: amdgpu_device pointer
>>> - * @cursor: current state
>>> - *
>>> - * Walk the PD/PT tree to the next leaf node.
>>> - */
>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>> -				   struct amdgpu_vm_pt_cursor *cursor)
>>> -{
>>> -	amdgpu_vm_pt_next(adev, cursor);
>>> -	if (cursor->pfn != ~0ll)
>>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>>> -}
>>> -
>>> -/**
>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
>>> - */
>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
>>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
>>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
>>> -
>>>     /**
>>>      * amdgpu_vm_pt_first_dfs - start a deep first search
>>>      *
>>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>      * Returns:
>>>      * 0 on success, errno otherwise.
>>>      */
>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> -			struct amdgpu_vm *vm,
>>> -			uint64_t saddr, uint64_t size)
>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> +			       struct amdgpu_vm *vm,
>>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>>     {
>>> -	struct amdgpu_vm_pt_cursor cursor;
>>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>>> +	struct amdgpu_bo_param bp;
>>>     	struct amdgpu_bo *pt;
>>> -	uint64_t eaddr;
>>>     	int r;
>>>     
>>> -	/* validate the parameters */
>>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
>>> -		return -EINVAL;
>>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>> +		unsigned num_entries;
>>>     
>>> -	eaddr = saddr + size - 1;
>>> -
>>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>> -
>>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>>> -			eaddr, adev->vm_manager.max_pfn);
>>> -		return -EINVAL;
>>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>>> +		entry->entries = kvmalloc_array(num_entries,
>>> +						sizeof(*entry->entries),
>>> +						GFP_KERNEL | __GFP_ZERO);
>>> +		if (!entry->entries)
>>> +			return -ENOMEM;
>>>     	}
>>>     
>>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>>> -		struct amdgpu_bo_param bp;
>>> -
>>> -		if (cursor.level < AMDGPU_VM_PTB) {
>>> -			unsigned num_entries;
>>> -
>>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
>>> -			entry->entries = kvmalloc_array(num_entries,
>>> -							sizeof(*entry->entries),
>>> -							GFP_KERNEL |
>>> -							__GFP_ZERO);
>>> -			if (!entry->entries)
>>> -				return -ENOMEM;
>>> -		}
>>> -
>>> -
>>> -		if (entry->base.bo)
>>> -			continue;
>>> -
>>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>> -
>>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>>> -		if (r)
>>> -			return r;
>>> -
>>> -		if (vm->use_cpu_for_update) {
>>> -			r = amdgpu_bo_kmap(pt, NULL);
>>> -			if (r)
>>> -				goto error_free_pt;
>>> -		}
>>> +	if (entry->base.bo)
>>> +		return 0;
>>>     
>>> -		/* Keep a reference to the root directory to avoid
>>> -		* freeing them up in the wrong order.
>>> -		*/
>>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>     
>>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>>> +	if (r)
>>> +		return r;
>>>     
>>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>>> +	if (vm->use_cpu_for_update) {
>>> +		r = amdgpu_bo_kmap(pt, NULL);
>>>     		if (r)
>>>     			goto error_free_pt;
>>>     	}
>>>     
>>> +	/* Keep a reference to the root directory to avoid
>>> +	 * freeing them up in the wrong order.
>>> +	 */
>>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>> +
>>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>>> +	if (r)
>>> +		goto error_free_pt;
>>> +
>>>     	return 0;
>>>     
>>>     error_free_pt:
>>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>     	struct amdgpu_vm_pt_cursor cursor;
>>>     	uint64_t frag_start = start, frag_end;
>>>     	unsigned int frag;
>>> +	int r;
>>>     
>>>     	/* figure out the initial fragment */
>>>     	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
>>> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>     	/* walk over the address space and update the PTs */
>>>     	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>     	while (cursor.pfn < end) {
>>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>     		unsigned shift, parent_shift, mask;
>>>     		uint64_t incr, entry_end, pe_start;
>>> +		struct amdgpu_bo *pt;
>>>     
>>> -		if (!pt)
>>> -			return -ENOENT;
>>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
>>> +		if (r)
>>> +			return r;
>>> +
>>> +		pt = cursor.entry->base.bo;
>>>     
>>>     		/* The root level can't be a huge page */
>>>     		if (cursor.level == adev->vm_manager.root_level) {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 81ff8177f092..116605c038d2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>>     int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>     			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>     			      void *param);
>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> -			struct amdgpu_vm *vm,
>>> -			uint64_t saddr, uint64_t size);
>>>     int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
>>>     int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>>     				 struct amdgpu_vm *vm);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Kuehling, Felix Feb. 5, 2019, 3:20 p.m.
> > This may cause regressions in KFD if PT BO allocation can trigger 

> > eviction fences.

> I don't think that can happen, but need to double check as well.

> 

> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.


Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here.

Regards,
  Felix

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 

Sent: Tuesday, February 05, 2019 6:37 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

> This may cause regressions in KFD if PT BO allocation can trigger 

> eviction fences.

I don't think that can happen, but need to double check as well.

Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.

> Do you know whether PT BO allocation would wait on the page-directory 

> reservation object without the sync-object API anywhere?

For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional.

One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?).

What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here.

Regards,
Christian.

Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
> This may cause regressions in KFD if PT BO allocation can trigger 

> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a 

> context where we had temporarily removed the eviction fence. PT BO 

> allocation in amdgpu_vm_bo_update is not protected like that.

>

> I vaguely remember looking into this last time you were working on our 

> eviction fence code and thinking that most of the cases where we 

> remove the eviction fence were no longer needed if fence 

> synchronization happens through the amdgpu_sync-object API (rather 

> than waiting on a reservation object directly). I think it's time I go 

> and finally clean that up.

>

> Do you know whether PT BO allocation would wait on the page-directory 

> reservation object without the sync-object API anywhere?

>

> Regards,

>     Felix

>

> On 2019-02-04 7:42 a.m., Christian König wrote:

>> Let's start to allocate VM PDs/PTs on demand instead of 

>> pre-allocating them during mapping.

>>

>> Signed-off-by: Christian König <christian.koenig@amd.com>

>> ---

>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>>    5 files changed, 38 insertions(+), 126 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> index d7b10d79f1de..2176c92f9377 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>>    					vm->process_info->eviction_fence,

>>    					NULL, NULL);

>>    

>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

>> -	if (ret) {

>> -		pr_err("Failed to allocate pts, err=%d\n", ret);

>> -		goto err_alloc_pts;

>> -	}

>> -

>>    	ret = vm_validate_pt_pd_bos(vm);

>>    	if (ret) {

>>    		pr_err("validate_pt_pd_bos() failed\n"); diff --git 

>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> index 7e22be7ca68a..54dd02a898b9 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>    		return -ENOMEM;

>>    	}

>>    

>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

>> -				size);

>> -	if (r) {

>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

>> -		amdgpu_vm_bo_rmv(adev, *bo_va);

>> -		ttm_eu_backoff_reservation(&ticket, &list);

>> -		return r;

>> -	}

>> -

>>    	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>>    			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>>    			     AMDGPU_PTE_EXECUTABLE);

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> index d21dd2f369da..e141e3b13112 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 

>> void *data,

>>    

>>    	switch (args->operation) {

>>    	case AMDGPU_VA_OP_MAP:

>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>> -					args->map_size);

>> -		if (r)

>> -			goto error_backoff;

>> -

>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>    		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>>    				     args->offset_in_bo, args->map_size, @@ -647,11 +642,6 @@ 

>> int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>>    						args->map_size);

>>    		break;

>>    	case AMDGPU_VA_OP_REPLACE:

>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>> -					args->map_size);

>> -		if (r)

>> -			goto error_backoff;

>> -

>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>    		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>>    					     args->offset_in_bo, args->map_size, diff --git 

>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> index f7d410a5d848..0b8a1aa56c4a 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>>    	}

>>    }

>>    

>> -/**

>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

>> - *

>> - * @adev: amdgpu_device pointer

>> - * @vm: amdgpu_vm structure

>> - * @start: start addr of the walk

>> - * @cursor: state to initialize

>> - *

>> - * Start a walk and go directly to the leaf node.

>> - */

>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

>> -				    struct amdgpu_vm *vm, uint64_t start,

>> -				    struct amdgpu_vm_pt_cursor *cursor)

>> -{

>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

>> -	while (amdgpu_vm_pt_descendant(adev, cursor));

>> -}

>> -

>> -/**

>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

>> - *

>> - * @adev: amdgpu_device pointer

>> - * @cursor: current state

>> - *

>> - * Walk the PD/PT tree to the next leaf node.

>> - */

>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

>> -				   struct amdgpu_vm_pt_cursor *cursor)

>> -{

>> -	amdgpu_vm_pt_next(adev, cursor);

>> -	if (cursor->pfn != ~0ll)

>> -		while (amdgpu_vm_pt_descendant(adev, cursor));

>> -}

>> -

>> -/**

>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the 

>> hierarchy

>> - */

>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

>> -

>>    /**

>>     * amdgpu_vm_pt_first_dfs - start a deep first search

>>     *

>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>     * Returns:

>>     * 0 on success, errno otherwise.

>>     */

>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> -			struct amdgpu_vm *vm,

>> -			uint64_t saddr, uint64_t size)

>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> +			       struct amdgpu_vm *vm,

>> +			       struct amdgpu_vm_pt_cursor *cursor)

>>    {

>> -	struct amdgpu_vm_pt_cursor cursor;

>> +	struct amdgpu_vm_pt *entry = cursor->entry;

>> +	struct amdgpu_bo_param bp;

>>    	struct amdgpu_bo *pt;

>> -	uint64_t eaddr;

>>    	int r;

>>    

>> -	/* validate the parameters */

>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

>> -		return -EINVAL;

>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

>> +		unsigned num_entries;

>>    

>> -	eaddr = saddr + size - 1;

>> -

>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

>> -

>> -	if (eaddr >= adev->vm_manager.max_pfn) {

>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

>> -			eaddr, adev->vm_manager.max_pfn);

>> -		return -EINVAL;

>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

>> +		entry->entries = kvmalloc_array(num_entries,

>> +						sizeof(*entry->entries),

>> +						GFP_KERNEL | __GFP_ZERO);

>> +		if (!entry->entries)

>> +			return -ENOMEM;

>>    	}

>>    

>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

>> -		struct amdgpu_vm_pt *entry = cursor.entry;

>> -		struct amdgpu_bo_param bp;

>> -

>> -		if (cursor.level < AMDGPU_VM_PTB) {

>> -			unsigned num_entries;

>> -

>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

>> -			entry->entries = kvmalloc_array(num_entries,

>> -							sizeof(*entry->entries),

>> -							GFP_KERNEL |

>> -							__GFP_ZERO);

>> -			if (!entry->entries)

>> -				return -ENOMEM;

>> -		}

>> -

>> -

>> -		if (entry->base.bo)

>> -			continue;

>> -

>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

>> -

>> -		r = amdgpu_bo_create(adev, &bp, &pt);

>> -		if (r)

>> -			return r;

>> -

>> -		if (vm->use_cpu_for_update) {

>> -			r = amdgpu_bo_kmap(pt, NULL);

>> -			if (r)

>> -				goto error_free_pt;

>> -		}

>> +	if (entry->base.bo)

>> +		return 0;

>>    

>> -		/* Keep a reference to the root directory to avoid

>> -		* freeing them up in the wrong order.

>> -		*/

>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>>    

>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>> +	r = amdgpu_bo_create(adev, &bp, &pt);

>> +	if (r)

>> +		return r;

>>    

>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

>> +	if (vm->use_cpu_for_update) {

>> +		r = amdgpu_bo_kmap(pt, NULL);

>>    		if (r)

>>    			goto error_free_pt;

>>    	}

>>    

>> +	/* Keep a reference to the root directory to avoid

>> +	 * freeing them up in the wrong order.

>> +	 */

>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>> +

>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

>> +	if (r)

>> +		goto error_free_pt;

>> +

>>    	return 0;

>>    

>>    error_free_pt:

>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>    	struct amdgpu_vm_pt_cursor cursor;

>>    	uint64_t frag_start = start, frag_end;

>>    	unsigned int frag;

>> +	int r;

>>    

>>    	/* figure out the initial fragment */

>>    	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, 

>> &frag_end); @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>    	/* walk over the address space and update the PTs */

>>    	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>>    	while (cursor.pfn < end) {

>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>>    		unsigned shift, parent_shift, mask;

>>    		uint64_t incr, entry_end, pe_start;

>> +		struct amdgpu_bo *pt;

>>    

>> -		if (!pt)

>> -			return -ENOENT;

>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

>> +		if (r)

>> +			return r;

>> +

>> +		pt = cursor.entry->base.bo;

>>    

>>    		/* The root level can't be a huge page */

>>    		if (cursor.level == adev->vm_manager.root_level) { diff --git 

>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> index 81ff8177f092..116605c038d2 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>>    int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>    			      int (*callback)(void *p, struct amdgpu_bo *bo),

>>    			      void *param);

>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>> -			struct amdgpu_vm *vm,

>> -			uint64_t saddr, uint64_t size);

>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>>    int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>>    				 struct amdgpu_vm *vm);
Christian König Feb. 5, 2019, 3:39 p.m.
Am 05.02.19 um 16:20 schrieb Kuehling, Felix:
>>> This may cause regressions in KFD if PT BO allocation can trigger
>>> eviction fences.
>> I don't think that can happen, but need to double check as well.
>>
>> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.
> Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here.

Yeah, but where is that actually coming from?

It sounds like we still unnecessary synchronize page table updates 
somewhere.

Do you have a call chain?

Regards,
Christian.

>
> Regards,
>    Felix
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, February 05, 2019 6:37 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
>
>> This may cause regressions in KFD if PT BO allocation can trigger
>> eviction fences.
> I don't think that can happen, but need to double check as well.
>
> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.
>
>> Do you know whether PT BO allocation would wait on the page-directory
>> reservation object without the sync-object API anywhere?
> For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional.
>
> One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?).
>
> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here.
>
> Regards,
> Christian.
>
> Am 04.02.19 um 21:17 schrieb Kuehling, Felix:
>> This may cause regressions in KFD if PT BO allocation can trigger
>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a
>> context where we had temporarily removed the eviction fence. PT BO
>> allocation in amdgpu_vm_bo_update is not protected like that.
>>
>> I vaguely remember looking into this last time you were working on our
>> eviction fence code and thinking that most of the cases where we
>> remove the eviction fence were no longer needed if fence
>> synchronization happens through the amdgpu_sync-object API (rather
>> than waiting on a reservation object directly). I think it's time I go
>> and finally clean that up.
>>
>> Do you know whether PT BO allocation would wait on the page-directory
>> reservation object without the sync-object API anywhere?
>>
>> Regards,
>>      Felix
>>
>> On 2019-02-04 7:42 a.m., Christian König wrote:
>>> Let's start to allocate VM PDs/PTs on demand instead of
>>> pre-allocating them during mapping.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>     5 files changed, 38 insertions(+), 126 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index d7b10d79f1de..2176c92f9377 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>     					vm->process_info->eviction_fence,
>>>     					NULL, NULL);
>>>     
>>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>> -	if (ret) {
>>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>>> -		goto err_alloc_pts;
>>> -	}
>>> -
>>>     	ret = vm_validate_pt_pd_bos(vm);
>>>     	if (ret) {
>>>     		pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> index 7e22be7ca68a..54dd02a898b9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>     		return -ENOMEM;
>>>     	}
>>>     
>>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>> -				size);
>>> -	if (r) {
>>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
>>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>>> -		ttm_eu_backoff_reservation(&ticket, &list);
>>> -		return r;
>>> -	}
>>> -
>>>     	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>     			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
>>>     			     AMDGPU_PTE_EXECUTABLE);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d21dd2f369da..e141e3b13112 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>     
>>>     	switch (args->operation) {
>>>     	case AMDGPU_VA_OP_MAP:
>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>>> -					args->map_size);
>>> -		if (r)
>>> -			goto error_backoff;
>>> -
>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>     		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>     				     args->offset_in_bo, args->map_size, @@ -647,11 +642,6 @@
>>> int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>>     						args->map_size);
>>>     		break;
>>>     	case AMDGPU_VA_OP_REPLACE:
>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
>>> -					args->map_size);
>>> -		if (r)
>>> -			goto error_backoff;
>>> -
>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>     		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
>>>     					     args->offset_in_bo, args->map_size, diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index f7d410a5d848..0b8a1aa56c4a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
>>>     	}
>>>     }
>>>     
>>> -/**
>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>> - *
>>> - * @adev: amdgpu_device pointer
>>> - * @vm: amdgpu_vm structure
>>> - * @start: start addr of the walk
>>> - * @cursor: state to initialize
>>> - *
>>> - * Start a walk and go directly to the leaf node.
>>> - */
>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>> -				    struct amdgpu_vm *vm, uint64_t start,
>>> -				    struct amdgpu_vm_pt_cursor *cursor)
>>> -{
>>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>>> -}
>>> -
>>> -/**
>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>> - *
>>> - * @adev: amdgpu_device pointer
>>> - * @cursor: current state
>>> - *
>>> - * Walk the PD/PT tree to the next leaf node.
>>> - */
>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>> -				   struct amdgpu_vm_pt_cursor *cursor)
>>> -{
>>> -	amdgpu_vm_pt_next(adev, cursor);
>>> -	if (cursor->pfn != ~0ll)
>>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>>> -}
>>> -
>>> -/**
>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>> hierarchy
>>> - */
>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
>>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
>>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
>>> -
>>>     /**
>>>      * amdgpu_vm_pt_first_dfs - start a deep first search
>>>      *
>>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>      * Returns:
>>>      * 0 on success, errno otherwise.
>>>      */
>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> -			struct amdgpu_vm *vm,
>>> -			uint64_t saddr, uint64_t size)
>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> +			       struct amdgpu_vm *vm,
>>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>>     {
>>> -	struct amdgpu_vm_pt_cursor cursor;
>>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>>> +	struct amdgpu_bo_param bp;
>>>     	struct amdgpu_bo *pt;
>>> -	uint64_t eaddr;
>>>     	int r;
>>>     
>>> -	/* validate the parameters */
>>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
>>> -		return -EINVAL;
>>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>> +		unsigned num_entries;
>>>     
>>> -	eaddr = saddr + size - 1;
>>> -
>>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>> -
>>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>>> -			eaddr, adev->vm_manager.max_pfn);
>>> -		return -EINVAL;
>>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>>> +		entry->entries = kvmalloc_array(num_entries,
>>> +						sizeof(*entry->entries),
>>> +						GFP_KERNEL | __GFP_ZERO);
>>> +		if (!entry->entries)
>>> +			return -ENOMEM;
>>>     	}
>>>     
>>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>>> -		struct amdgpu_bo_param bp;
>>> -
>>> -		if (cursor.level < AMDGPU_VM_PTB) {
>>> -			unsigned num_entries;
>>> -
>>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
>>> -			entry->entries = kvmalloc_array(num_entries,
>>> -							sizeof(*entry->entries),
>>> -							GFP_KERNEL |
>>> -							__GFP_ZERO);
>>> -			if (!entry->entries)
>>> -				return -ENOMEM;
>>> -		}
>>> -
>>> -
>>> -		if (entry->base.bo)
>>> -			continue;
>>> -
>>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>> -
>>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>>> -		if (r)
>>> -			return r;
>>> -
>>> -		if (vm->use_cpu_for_update) {
>>> -			r = amdgpu_bo_kmap(pt, NULL);
>>> -			if (r)
>>> -				goto error_free_pt;
>>> -		}
>>> +	if (entry->base.bo)
>>> +		return 0;
>>>     
>>> -		/* Keep a reference to the root directory to avoid
>>> -		* freeing them up in the wrong order.
>>> -		*/
>>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>     
>>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>>> +	if (r)
>>> +		return r;
>>>     
>>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>>> +	if (vm->use_cpu_for_update) {
>>> +		r = amdgpu_bo_kmap(pt, NULL);
>>>     		if (r)
>>>     			goto error_free_pt;
>>>     	}
>>>     
>>> +	/* Keep a reference to the root directory to avoid
>>> +	 * freeing them up in the wrong order.
>>> +	 */
>>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>> +
>>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>>> +	if (r)
>>> +		goto error_free_pt;
>>> +
>>>     	return 0;
>>>     
>>>     error_free_pt:
>>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>     	struct amdgpu_vm_pt_cursor cursor;
>>>     	uint64_t frag_start = start, frag_end;
>>>     	unsigned int frag;
>>> +	int r;
>>>     
>>>     	/* figure out the initial fragment */
>>>     	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>> &frag_end); @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>     	/* walk over the address space and update the PTs */
>>>     	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>     	while (cursor.pfn < end) {
>>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>     		unsigned shift, parent_shift, mask;
>>>     		uint64_t incr, entry_end, pe_start;
>>> +		struct amdgpu_bo *pt;
>>>     
>>> -		if (!pt)
>>> -			return -ENOENT;
>>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
>>> +		if (r)
>>> +			return r;
>>> +
>>> +		pt = cursor.entry->base.bo;
>>>     
>>>     		/* The root level can't be a huge page */
>>>     		if (cursor.level == adev->vm_manager.root_level) { diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 81ff8177f092..116605c038d2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>>     int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>     			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>     			      void *param);
>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>> -			struct amdgpu_vm *vm,
>>> -			uint64_t saddr, uint64_t size);
>>>     int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
>>>     int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>>     				 struct amdgpu_vm *vm);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Kuehling, Felix Feb. 5, 2019, 3:49 p.m.
I saw a backtrace from the GPU scheduler when I was debugging this yesterday, but those backtraces never tell us where the command submission came from. It could be memory initialization, or some migration at buffer-validation. Basically, any command submission triggered by page table allocation that synchronizes with the PD reservation object is a problem.

Regards,
  Felix

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 

Sent: Tuesday, February 05, 2019 10:40 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

Am 05.02.19 um 16:20 schrieb Kuehling, Felix:
>>> This may cause regressions in KFD if PT BO allocation can trigger 

>>> eviction fences.

>> I don't think that can happen, but need to double check as well.

>>

>> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.

> Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here.


Yeah, but where is that actually coming from?

It sounds like we still unnecessary synchronize page table updates somewhere.

Do you have a call chain?

Regards,
Christian.

>

> Regards,

>    Felix

>

> -----Original Message-----

> From: Christian König <ckoenig.leichtzumerken@gmail.com>

> Sent: Tuesday, February 05, 2019 6:37 AM

> To: Kuehling, Felix <Felix.Kuehling@amd.com>; 

> amd-gfx@lists.freedesktop.org

> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

>

>> This may cause regressions in KFD if PT BO allocation can trigger 

>> eviction fences.

> I don't think that can happen, but need to double check as well.

>

> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.

>

>> Do you know whether PT BO allocation would wait on the page-directory 

>> reservation object without the sync-object API anywhere?

> For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional.

>

> One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?).

>

> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here.

>

> Regards,

> Christian.

>

> Am 04.02.19 um 21:17 schrieb Kuehling, Felix:

>> This may cause regressions in KFD if PT BO allocation can trigger 

>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a 

>> context where we had temporarily removed the eviction fence. PT BO 

>> allocation in amdgpu_vm_bo_update is not protected like that.

>>

>> I vaguely remember looking into this last time you were working on 

>> our eviction fence code and thinking that most of the cases where we 

>> remove the eviction fence were no longer needed if fence 

>> synchronization happens through the amdgpu_sync-object API (rather 

>> than waiting on a reservation object directly). I think it's time I 

>> go and finally clean that up.

>>

>> Do you know whether PT BO allocation would wait on the page-directory 

>> reservation object without the sync-object API anywhere?

>>

>> Regards,

>>      Felix

>>

>> On 2019-02-04 7:42 a.m., Christian König wrote:

>>> Let's start to allocate VM PDs/PTs on demand instead of 

>>> pre-allocating them during mapping.

>>>

>>> Signed-off-by: Christian König <christian.koenig@amd.com>

>>> ---

>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>>>     5 files changed, 38 insertions(+), 126 deletions(-)

>>>

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>> index d7b10d79f1de..2176c92f9377 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>>>     					vm->process_info->eviction_fence,

>>>     					NULL, NULL);

>>>     

>>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

>>> -	if (ret) {

>>> -		pr_err("Failed to allocate pts, err=%d\n", ret);

>>> -		goto err_alloc_pts;

>>> -	}

>>> -

>>>     	ret = vm_validate_pt_pd_bos(vm);

>>>     	if (ret) {

>>>     		pr_err("validate_pt_pd_bos() failed\n"); diff --git 

>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>> index 7e22be7ca68a..54dd02a898b9 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>     		return -ENOMEM;

>>>     	}

>>>     

>>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

>>> -				size);

>>> -	if (r) {

>>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

>>> -		amdgpu_vm_bo_rmv(adev, *bo_va);

>>> -		ttm_eu_backoff_reservation(&ticket, &list);

>>> -		return r;

>>> -	}

>>> -

>>>     	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>>>     			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>>>     			     AMDGPU_PTE_EXECUTABLE);

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> index d21dd2f369da..e141e3b13112 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 

>>> void *data,

>>>     

>>>     	switch (args->operation) {

>>>     	case AMDGPU_VA_OP_MAP:

>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>>> -					args->map_size);

>>> -		if (r)

>>> -			goto error_backoff;

>>> -

>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>>     		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>>>     				     args->offset_in_bo, args->map_size, @@ -647,11 +642,6 

>>> @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>>>     						args->map_size);

>>>     		break;

>>>     	case AMDGPU_VA_OP_REPLACE:

>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>>> -					args->map_size);

>>> -		if (r)

>>> -			goto error_backoff;

>>> -

>>>     		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>>     		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>>>     					     args->offset_in_bo, args->map_size, diff --git 

>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>> index f7d410a5d848..0b8a1aa56c4a 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>>>     	}

>>>     }

>>>     

>>> -/**

>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

>>> - *

>>> - * @adev: amdgpu_device pointer

>>> - * @vm: amdgpu_vm structure

>>> - * @start: start addr of the walk

>>> - * @cursor: state to initialize

>>> - *

>>> - * Start a walk and go directly to the leaf node.

>>> - */

>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

>>> -				    struct amdgpu_vm *vm, uint64_t start,

>>> -				    struct amdgpu_vm_pt_cursor *cursor)

>>> -{

>>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

>>> -	while (amdgpu_vm_pt_descendant(adev, cursor));

>>> -}

>>> -

>>> -/**

>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

>>> - *

>>> - * @adev: amdgpu_device pointer

>>> - * @cursor: current state

>>> - *

>>> - * Walk the PD/PT tree to the next leaf node.

>>> - */

>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

>>> -				   struct amdgpu_vm_pt_cursor *cursor)

>>> -{

>>> -	amdgpu_vm_pt_next(adev, cursor);

>>> -	if (cursor->pfn != ~0ll)

>>> -		while (amdgpu_vm_pt_descendant(adev, cursor));

>>> -}

>>> -

>>> -/**

>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the 

>>> hierarchy

>>> - */

>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

>>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

>>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

>>> -

>>>     /**

>>>      * amdgpu_vm_pt_first_dfs - start a deep first search

>>>      *

>>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>      * Returns:

>>>      * 0 on success, errno otherwise.

>>>      */

>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>> -			struct amdgpu_vm *vm,

>>> -			uint64_t saddr, uint64_t size)

>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>> +			       struct amdgpu_vm *vm,

>>> +			       struct amdgpu_vm_pt_cursor *cursor)

>>>     {

>>> -	struct amdgpu_vm_pt_cursor cursor;

>>> +	struct amdgpu_vm_pt *entry = cursor->entry;

>>> +	struct amdgpu_bo_param bp;

>>>     	struct amdgpu_bo *pt;

>>> -	uint64_t eaddr;

>>>     	int r;

>>>     

>>> -	/* validate the parameters */

>>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

>>> -		return -EINVAL;

>>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

>>> +		unsigned num_entries;

>>>     

>>> -	eaddr = saddr + size - 1;

>>> -

>>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

>>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

>>> -

>>> -	if (eaddr >= adev->vm_manager.max_pfn) {

>>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

>>> -			eaddr, adev->vm_manager.max_pfn);

>>> -		return -EINVAL;

>>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

>>> +		entry->entries = kvmalloc_array(num_entries,

>>> +						sizeof(*entry->entries),

>>> +						GFP_KERNEL | __GFP_ZERO);

>>> +		if (!entry->entries)

>>> +			return -ENOMEM;

>>>     	}

>>>     

>>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

>>> -		struct amdgpu_vm_pt *entry = cursor.entry;

>>> -		struct amdgpu_bo_param bp;

>>> -

>>> -		if (cursor.level < AMDGPU_VM_PTB) {

>>> -			unsigned num_entries;

>>> -

>>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

>>> -			entry->entries = kvmalloc_array(num_entries,

>>> -							sizeof(*entry->entries),

>>> -							GFP_KERNEL |

>>> -							__GFP_ZERO);

>>> -			if (!entry->entries)

>>> -				return -ENOMEM;

>>> -		}

>>> -

>>> -

>>> -		if (entry->base.bo)

>>> -			continue;

>>> -

>>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

>>> -

>>> -		r = amdgpu_bo_create(adev, &bp, &pt);

>>> -		if (r)

>>> -			return r;

>>> -

>>> -		if (vm->use_cpu_for_update) {

>>> -			r = amdgpu_bo_kmap(pt, NULL);

>>> -			if (r)

>>> -				goto error_free_pt;

>>> -		}

>>> +	if (entry->base.bo)

>>> +		return 0;

>>>     

>>> -		/* Keep a reference to the root directory to avoid

>>> -		* freeing them up in the wrong order.

>>> -		*/

>>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

>>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>>>     

>>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>>> +	r = amdgpu_bo_create(adev, &bp, &pt);

>>> +	if (r)

>>> +		return r;

>>>     

>>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

>>> +	if (vm->use_cpu_for_update) {

>>> +		r = amdgpu_bo_kmap(pt, NULL);

>>>     		if (r)

>>>     			goto error_free_pt;

>>>     	}

>>>     

>>> +	/* Keep a reference to the root directory to avoid

>>> +	 * freeing them up in the wrong order.

>>> +	 */

>>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

>>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>>> +

>>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

>>> +	if (r)

>>> +		goto error_free_pt;

>>> +

>>>     	return 0;

>>>     

>>>     error_free_pt:

>>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>>     	struct amdgpu_vm_pt_cursor cursor;

>>>     	uint64_t frag_start = start, frag_end;

>>>     	unsigned int frag;

>>> +	int r;

>>>     

>>>     	/* figure out the initial fragment */

>>>     	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, 

>>> &frag_end); @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>>     	/* walk over the address space and update the PTs */

>>>     	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>>>     	while (cursor.pfn < end) {

>>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>>>     		unsigned shift, parent_shift, mask;

>>>     		uint64_t incr, entry_end, pe_start;

>>> +		struct amdgpu_bo *pt;

>>>     

>>> -		if (!pt)

>>> -			return -ENOENT;

>>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

>>> +		if (r)

>>> +			return r;

>>> +

>>> +		pt = cursor.entry->base.bo;

>>>     

>>>     		/* The root level can't be a huge page */

>>>     		if (cursor.level == adev->vm_manager.root_level) { diff --git 

>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>> index 81ff8177f092..116605c038d2 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>>>     int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>     			      int (*callback)(void *p, struct amdgpu_bo *bo),

>>>     			      void *param);

>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>> -			struct amdgpu_vm *vm,

>>> -			uint64_t saddr, uint64_t size);

>>>     int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>>>     int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>>>     				 struct amdgpu_vm *vm);

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Koenig, Christian Feb. 5, 2019, 4 p.m.
Ah! The initial clear of the PDs is syncing to everything!

Ok, that is easy to fix.

Christian.

Am 05.02.19 um 16:49 schrieb Kuehling, Felix:
> I saw a backtrace from the GPU scheduler when I was debugging this yesterday, but those backtraces never tell us where the command submission came from. It could be memory initialization, or some migration at buffer-validation. Basically, any command submission triggered by page table allocation that synchronizes with the PD reservation object is a problem.

>

> Regards,

>    Felix

>

> -----Original Message-----

> From: Christian König <ckoenig.leichtzumerken@gmail.com>

> Sent: Tuesday, February 05, 2019 10:40 AM

> To: Kuehling, Felix <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org

> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

>

> Am 05.02.19 um 16:20 schrieb Kuehling, Felix:

>>>> This may cause regressions in KFD if PT BO allocation can trigger

>>>> eviction fences.

>>> I don't think that can happen, but need to double check as well.

>>>

>>> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.

>> Eviction fences don't cause actual memory evictions. They are the result of memory evictions. They cause KFD processes to be preempted in order to allow the fence to signal so memory can be evicted. The problem is that a page table allocation waits for the fences on the PD reservation, which looks like an eviction to KFD and triggers an unnecessary preemption. There is no actual memory eviction happening here.

> Yeah, but where is that actually coming from?

>

> It sounds like we still unnecessary synchronize page table updates somewhere.

>

> Do you have a call chain?

>

> Regards,

> Christian.

>

>> Regards,

>>     Felix

>>

>> -----Original Message-----

>> From: Christian König <ckoenig.leichtzumerken@gmail.com>

>> Sent: Tuesday, February 05, 2019 6:37 AM

>> To: Kuehling, Felix <Felix.Kuehling@amd.com>;

>> amd-gfx@lists.freedesktop.org

>> Subject: Re: [PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand

>>

>>> This may cause regressions in KFD if PT BO allocation can trigger

>>> eviction fences.

>> I don't think that can happen, but need to double check as well.

>>

>> Otherwise allocating page tables could try to evict other page tables from the same process and that seriously doesn't make much sense.

>>

>>> Do you know whether PT BO allocation would wait on the page-directory

>>> reservation object without the sync-object API anywhere?

>> For inside the kernel I can't think of any relevant use case, except for the eviction call path and there it is intentional.

>>

>> One thing which will always wait for all fences is the display code path, but that is not relevant here. (Isn't it?).

>>

>> What might be questionable is amdgpu_gem_wait_idle_ioctl(), but I think this is also completely intentional that we wait on everything here.

>>

>> Regards,

>> Christian.

>>

>> Am 04.02.19 um 21:17 schrieb Kuehling, Felix:

>>> This may cause regressions in KFD if PT BO allocation can trigger

>>> eviction fences. The previous call to amdgpu_vm_alloc_pts was in a

>>> context where we had temporarily removed the eviction fence. PT BO

>>> allocation in amdgpu_vm_bo_update is not protected like that.

>>>

>>> I vaguely remember looking into this last time you were working on

>>> our eviction fence code and thinking that most of the cases where we

>>> remove the eviction fence were no longer needed if fence

>>> synchronization happens through the amdgpu_sync-object API (rather

>>> than waiting on a reservation object directly). I think it's time I

>>> go and finally clean that up.

>>>

>>> Do you know whether PT BO allocation would wait on the page-directory

>>> reservation object without the sync-object API anywhere?

>>>

>>> Regards,

>>>       Felix

>>>

>>> On 2019-02-04 7:42 a.m., Christian König wrote:

>>>> Let's start to allocate VM PDs/PTs on demand instead of

>>>> pre-allocating them during mapping.

>>>>

>>>> Signed-off-by: Christian König <christian.koenig@amd.com>

>>>> ---

>>>>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>>>>      5 files changed, 38 insertions(+), 126 deletions(-)

>>>>

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>>> index d7b10d79f1de..2176c92f9377 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>>>> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>>>>      					vm->process_info->eviction_fence,

>>>>      					NULL, NULL);

>>>>      

>>>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

>>>> -	if (ret) {

>>>> -		pr_err("Failed to allocate pts, err=%d\n", ret);

>>>> -		goto err_alloc_pts;

>>>> -	}

>>>> -

>>>>      	ret = vm_validate_pt_pd_bos(vm);

>>>>      	if (ret) {

>>>>      		pr_err("validate_pt_pd_bos() failed\n"); diff --git

>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>>> index 7e22be7ca68a..54dd02a898b9 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>>      		return -ENOMEM;

>>>>      	}

>>>>      

>>>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

>>>> -				size);

>>>> -	if (r) {

>>>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

>>>> -		amdgpu_vm_bo_rmv(adev, *bo_va);

>>>> -		ttm_eu_backoff_reservation(&ticket, &list);

>>>> -		return r;

>>>> -	}

>>>> -

>>>>      	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>>>>      			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>>>>      			     AMDGPU_PTE_EXECUTABLE);

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> index d21dd2f369da..e141e3b13112 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,

>>>> void *data,

>>>>      

>>>>      	switch (args->operation) {

>>>>      	case AMDGPU_VA_OP_MAP:

>>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>>>> -					args->map_size);

>>>> -		if (r)

>>>> -			goto error_backoff;

>>>> -

>>>>      		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>>>      		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>>>>      				     args->offset_in_bo, args->map_size, @@ -647,11 +642,6

>>>> @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>>>>      						args->map_size);

>>>>      		break;

>>>>      	case AMDGPU_VA_OP_REPLACE:

>>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

>>>> -					args->map_size);

>>>> -		if (r)

>>>> -			goto error_backoff;

>>>> -

>>>>      		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>>>>      		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>>>>      					     args->offset_in_bo, args->map_size, diff --git

>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>>> index f7d410a5d848..0b8a1aa56c4a 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>>>>      	}

>>>>      }

>>>>      

>>>> -/**

>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

>>>> - *

>>>> - * @adev: amdgpu_device pointer

>>>> - * @vm: amdgpu_vm structure

>>>> - * @start: start addr of the walk

>>>> - * @cursor: state to initialize

>>>> - *

>>>> - * Start a walk and go directly to the leaf node.

>>>> - */

>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

>>>> -				    struct amdgpu_vm *vm, uint64_t start,

>>>> -				    struct amdgpu_vm_pt_cursor *cursor)

>>>> -{

>>>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

>>>> -	while (amdgpu_vm_pt_descendant(adev, cursor));

>>>> -}

>>>> -

>>>> -/**

>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

>>>> - *

>>>> - * @adev: amdgpu_device pointer

>>>> - * @cursor: current state

>>>> - *

>>>> - * Walk the PD/PT tree to the next leaf node.

>>>> - */

>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

>>>> -				   struct amdgpu_vm_pt_cursor *cursor)

>>>> -{

>>>> -	amdgpu_vm_pt_next(adev, cursor);

>>>> -	if (cursor->pfn != ~0ll)

>>>> -		while (amdgpu_vm_pt_descendant(adev, cursor));

>>>> -}

>>>> -

>>>> -/**

>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the

>>>> hierarchy

>>>> - */

>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

>>>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

>>>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

>>>> -

>>>>      /**

>>>>       * amdgpu_vm_pt_first_dfs - start a deep first search

>>>>       *

>>>> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>>       * Returns:

>>>>       * 0 on success, errno otherwise.

>>>>       */

>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>>> -			struct amdgpu_vm *vm,

>>>> -			uint64_t saddr, uint64_t size)

>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>>> +			       struct amdgpu_vm *vm,

>>>> +			       struct amdgpu_vm_pt_cursor *cursor)

>>>>      {

>>>> -	struct amdgpu_vm_pt_cursor cursor;

>>>> +	struct amdgpu_vm_pt *entry = cursor->entry;

>>>> +	struct amdgpu_bo_param bp;

>>>>      	struct amdgpu_bo *pt;

>>>> -	uint64_t eaddr;

>>>>      	int r;

>>>>      

>>>> -	/* validate the parameters */

>>>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

>>>> -		return -EINVAL;

>>>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

>>>> +		unsigned num_entries;

>>>>      

>>>> -	eaddr = saddr + size - 1;

>>>> -

>>>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

>>>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

>>>> -

>>>> -	if (eaddr >= adev->vm_manager.max_pfn) {

>>>> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

>>>> -			eaddr, adev->vm_manager.max_pfn);

>>>> -		return -EINVAL;

>>>> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

>>>> +		entry->entries = kvmalloc_array(num_entries,

>>>> +						sizeof(*entry->entries),

>>>> +						GFP_KERNEL | __GFP_ZERO);

>>>> +		if (!entry->entries)

>>>> +			return -ENOMEM;

>>>>      	}

>>>>      

>>>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

>>>> -		struct amdgpu_vm_pt *entry = cursor.entry;

>>>> -		struct amdgpu_bo_param bp;

>>>> -

>>>> -		if (cursor.level < AMDGPU_VM_PTB) {

>>>> -			unsigned num_entries;

>>>> -

>>>> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

>>>> -			entry->entries = kvmalloc_array(num_entries,

>>>> -							sizeof(*entry->entries),

>>>> -							GFP_KERNEL |

>>>> -							__GFP_ZERO);

>>>> -			if (!entry->entries)

>>>> -				return -ENOMEM;

>>>> -		}

>>>> -

>>>> -

>>>> -		if (entry->base.bo)

>>>> -			continue;

>>>> -

>>>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

>>>> -

>>>> -		r = amdgpu_bo_create(adev, &bp, &pt);

>>>> -		if (r)

>>>> -			return r;

>>>> -

>>>> -		if (vm->use_cpu_for_update) {

>>>> -			r = amdgpu_bo_kmap(pt, NULL);

>>>> -			if (r)

>>>> -				goto error_free_pt;

>>>> -		}

>>>> +	if (entry->base.bo)

>>>> +		return 0;

>>>>      

>>>> -		/* Keep a reference to the root directory to avoid

>>>> -		* freeing them up in the wrong order.

>>>> -		*/

>>>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

>>>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>>>>      

>>>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>>>> +	r = amdgpu_bo_create(adev, &bp, &pt);

>>>> +	if (r)

>>>> +		return r;

>>>>      

>>>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

>>>> +	if (vm->use_cpu_for_update) {

>>>> +		r = amdgpu_bo_kmap(pt, NULL);

>>>>      		if (r)

>>>>      			goto error_free_pt;

>>>>      	}

>>>>      

>>>> +	/* Keep a reference to the root directory to avoid

>>>> +	 * freeing them up in the wrong order.

>>>> +	 */

>>>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

>>>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

>>>> +

>>>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

>>>> +	if (r)

>>>> +		goto error_free_pt;

>>>> +

>>>>      	return 0;

>>>>      

>>>>      error_free_pt:

>>>> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>>>      	struct amdgpu_vm_pt_cursor cursor;

>>>>      	uint64_t frag_start = start, frag_end;

>>>>      	unsigned int frag;

>>>> +	int r;

>>>>      

>>>>      	/* figure out the initial fragment */

>>>>      	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,

>>>> &frag_end); @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>>>>      	/* walk over the address space and update the PTs */

>>>>      	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>>>>      	while (cursor.pfn < end) {

>>>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>>>>      		unsigned shift, parent_shift, mask;

>>>>      		uint64_t incr, entry_end, pe_start;

>>>> +		struct amdgpu_bo *pt;

>>>>      

>>>> -		if (!pt)

>>>> -			return -ENOENT;

>>>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

>>>> +		if (r)

>>>> +			return r;

>>>> +

>>>> +		pt = cursor.entry->base.bo;

>>>>      

>>>>      		/* The root level can't be a huge page */

>>>>      		if (cursor.level == adev->vm_manager.root_level) { diff --git

>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>>> index 81ff8177f092..116605c038d2 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>>>>      int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>>>>      			      int (*callback)(void *p, struct amdgpu_bo *bo),

>>>>      			      void *param);

>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

>>>> -			struct amdgpu_vm *vm,

>>>> -			uint64_t saddr, uint64_t size);

>>>>      int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>>>>      int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>>>>      				 struct amdgpu_vm *vm);

>> _______________________________________________

>> amd-gfx mailing list

>> amd-gfx@lists.freedesktop.org

>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Kuehling, Felix Feb. 12, 2019, 3:26 p.m.
I pushed my patch series that simplifies eviction fence handling in KFD. 
If you rebase this, it should be OK now.

Regards,
   Felix

On 2019-02-04 7:42 a.m., Christian König wrote:
> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating

> them during mapping.

>

> Signed-off-by: Christian König <christian.koenig@amd.com>

> ---

>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   6 -

>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --

>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --

>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------

>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -

>   5 files changed, 38 insertions(+), 126 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> index d7b10d79f1de..2176c92f9377 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,

>   					vm->process_info->eviction_fence,

>   					NULL, NULL);

>   

> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));

> -	if (ret) {

> -		pr_err("Failed to allocate pts, err=%d\n", ret);

> -		goto err_alloc_pts;

> -	}

> -

>   	ret = vm_validate_pt_pd_bos(vm);

>   	if (ret) {

>   		pr_err("validate_pt_pd_bos() failed\n");

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> index 7e22be7ca68a..54dd02a898b9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c

> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>   		return -ENOMEM;

>   	}

>   

> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,

> -				size);

> -	if (r) {

> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);

> -		amdgpu_vm_bo_rmv(adev, *bo_va);

> -		ttm_eu_backoff_reservation(&ticket, &list);

> -		return r;

> -	}

> -

>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,

>   			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

>   			     AMDGPU_PTE_EXECUTABLE);

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> index d21dd2f369da..e141e3b13112 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>   

>   	switch (args->operation) {

>   	case AMDGPU_VA_OP_MAP:

> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

> -					args->map_size);

> -		if (r)

> -			goto error_backoff;

> -

>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,

>   				     args->offset_in_bo, args->map_size,

> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

>   						args->map_size);

>   		break;

>   	case AMDGPU_VA_OP_REPLACE:

> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,

> -					args->map_size);

> -		if (r)

> -			goto error_backoff;

> -

>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);

>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,

>   					     args->offset_in_bo, args->map_size,

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index f7d410a5d848..0b8a1aa56c4a 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,

>   	}

>   }

>   

> -/**

> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT

> - *

> - * @adev: amdgpu_device pointer

> - * @vm: amdgpu_vm structure

> - * @start: start addr of the walk

> - * @cursor: state to initialize

> - *

> - * Start a walk and go directly to the leaf node.

> - */

> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,

> -				    struct amdgpu_vm *vm, uint64_t start,

> -				    struct amdgpu_vm_pt_cursor *cursor)

> -{

> -	amdgpu_vm_pt_start(adev, vm, start, cursor);

> -	while (amdgpu_vm_pt_descendant(adev, cursor));

> -}

> -

> -/**

> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT

> - *

> - * @adev: amdgpu_device pointer

> - * @cursor: current state

> - *

> - * Walk the PD/PT tree to the next leaf node.

> - */

> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,

> -				   struct amdgpu_vm_pt_cursor *cursor)

> -{

> -	amdgpu_vm_pt_next(adev, cursor);

> -	if (cursor->pfn != ~0ll)

> -		while (amdgpu_vm_pt_descendant(adev, cursor));

> -}

> -

> -/**

> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy

> - */

> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\

> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\

> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

> -

>   /**

>    * amdgpu_vm_pt_first_dfs - start a deep first search

>    *

> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>    * Returns:

>    * 0 on success, errno otherwise.

>    */

> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> -			struct amdgpu_vm *vm,

> -			uint64_t saddr, uint64_t size)

> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> +			       struct amdgpu_vm *vm,

> +			       struct amdgpu_vm_pt_cursor *cursor)

>   {

> -	struct amdgpu_vm_pt_cursor cursor;

> +	struct amdgpu_vm_pt *entry = cursor->entry;

> +	struct amdgpu_bo_param bp;

>   	struct amdgpu_bo *pt;

> -	uint64_t eaddr;

>   	int r;

>   

> -	/* validate the parameters */

> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)

> -		return -EINVAL;

> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {

> +		unsigned num_entries;

>   

> -	eaddr = saddr + size - 1;

> -

> -	saddr /= AMDGPU_GPU_PAGE_SIZE;

> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;

> -

> -	if (eaddr >= adev->vm_manager.max_pfn) {

> -		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",

> -			eaddr, adev->vm_manager.max_pfn);

> -		return -EINVAL;

> +		num_entries = amdgpu_vm_num_entries(adev, cursor->level);

> +		entry->entries = kvmalloc_array(num_entries,

> +						sizeof(*entry->entries),

> +						GFP_KERNEL | __GFP_ZERO);

> +		if (!entry->entries)

> +			return -ENOMEM;

>   	}

>   

> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {

> -		struct amdgpu_vm_pt *entry = cursor.entry;

> -		struct amdgpu_bo_param bp;

> -

> -		if (cursor.level < AMDGPU_VM_PTB) {

> -			unsigned num_entries;

> -

> -			num_entries = amdgpu_vm_num_entries(adev, cursor.level);

> -			entry->entries = kvmalloc_array(num_entries,

> -							sizeof(*entry->entries),

> -							GFP_KERNEL |

> -							__GFP_ZERO);

> -			if (!entry->entries)

> -				return -ENOMEM;

> -		}

> -

> -

> -		if (entry->base.bo)

> -			continue;

> -

> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);

> -

> -		r = amdgpu_bo_create(adev, &bp, &pt);

> -		if (r)

> -			return r;

> -

> -		if (vm->use_cpu_for_update) {

> -			r = amdgpu_bo_kmap(pt, NULL);

> -			if (r)

> -				goto error_free_pt;

> -		}

> +	if (entry->base.bo)

> +		return 0;

>   

> -		/* Keep a reference to the root directory to avoid

> -		* freeing them up in the wrong order.

> -		*/

> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

>   

> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);

> +	r = amdgpu_bo_create(adev, &bp, &pt);

> +	if (r)

> +		return r;

>   

> -		r = amdgpu_vm_clear_bo(adev, vm, pt);

> +	if (vm->use_cpu_for_update) {

> +		r = amdgpu_bo_kmap(pt, NULL);

>   		if (r)

>   			goto error_free_pt;

>   	}

>   

> +	/* Keep a reference to the root directory to avoid

> +	 * freeing them up in the wrong order.

> +	 */

> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);

> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

> +

> +	r = amdgpu_vm_clear_bo(adev, vm, pt);

> +	if (r)

> +		goto error_free_pt;

> +

>   	return 0;

>   

>   error_free_pt:

> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>   	struct amdgpu_vm_pt_cursor cursor;

>   	uint64_t frag_start = start, frag_end;

>   	unsigned int frag;

> +	int r;

>   

>   	/* figure out the initial fragment */

>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);

> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,

>   	/* walk over the address space and update the PTs */

>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);

>   	while (cursor.pfn < end) {

> -		struct amdgpu_bo *pt = cursor.entry->base.bo;

>   		unsigned shift, parent_shift, mask;

>   		uint64_t incr, entry_end, pe_start;

> +		struct amdgpu_bo *pt;

>   

> -		if (!pt)

> -			return -ENOENT;

> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

> +		if (r)

> +			return r;

> +

> +		pt = cursor.entry->base.bo;

>   

>   		/* The root level can't be a huge page */

>   		if (cursor.level == adev->vm_manager.root_level) {

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> index 81ff8177f092..116605c038d2 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);

>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,

>   			      int (*callback)(void *p, struct amdgpu_bo *bo),

>   			      void *param);

> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,

> -			struct amdgpu_vm *vm,

> -			uint64_t saddr, uint64_t size);

>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);

>   int amdgpu_vm_update_directories(struct amdgpu_device *adev,

>   				 struct amdgpu_vm *vm);