[2/4] drm/amdgpu: add support for per VM BOs v2

Submitted by Christian König on Aug. 30, 2017, 3 p.m.

Details

Message ID 1504105209-1558-2-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Commit Message

Christian König Aug. 30, 2017, 3 p.m.
From: Christian König <christian.koenig@amd.com>

Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
need to be specified in the BO lists.

v2: validate PDs/PTs first

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
 3 files changed, 60 insertions(+), 26 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f68ac56..48e18cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -813,7 +813,7 @@  static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4cdfb70..6cd20e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,18 @@  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (vm->use_cpu_for_update) {
+		if (bo->tbo.type == ttm_bo_type_kernel &&
+		    vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(bo, NULL);
 			if (r)
 				return r;
 		}
 
 		spin_lock(&vm->status_lock);
-		list_move(&bo_base->vm_status, &vm->relocated);
+		if (bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1994,20 +1998,23 @@  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_clear_moved - clear moved BOs in the PT
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @sync: sync object to add fences to
  *
- * Make sure all moved BOs are cleared in the PT.
+ * Make sure all BOs which are moved are updated in the PTs.
  * Returns 0 for success.
  *
- * PTs have to be reserved and mutex must be locked!
+ * PTs have to be reserved!
  */
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			    struct amdgpu_sync *sync)
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
+	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
@@ -2016,7 +2023,10 @@  int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
-		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		/* Per VM BOs never need to bo cleared in the page tables */
+		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
@@ -2068,6 +2078,37 @@  struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	return bo_va;
 }
 
+
+/**
+ * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_PTE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		spin_lock(&vm->status_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->status_lock);
+	}
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -2119,18 +2160,12 @@  int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	if (!mapping)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mapping->list);
 	mapping->start = saddr;
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2157,7 +2192,6 @@  int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo = bo_va->base.bo;
-	struct amdgpu_vm *vm = bo_va->base.vm;
 	uint64_t eaddr;
 	int r;
 
@@ -2191,12 +2225,7 @@  int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2411,7 +2440,11 @@  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 			spin_lock(&bo_base->vm->status_lock);
-			list_move(&bo_base->vm_status, &vm->evicted);
+			if (bo->tbo.type == ttm_bo_type_kernel)
+				list_move(&bo_base->vm_status, &vm->evicted);
+			else
+				list_move_tail(&bo_base->vm_status,
+					       &vm->evicted);
 			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c3753af..90b7741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -249,8 +249,9 @@  int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct amdgpu_sync *sync);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);

Comments

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Some more thoughts inline, but nothing that should be addressed in this
change.

Regards,
  Felix


On 2017-08-30 11:00 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
> need to be specified in the BO lists.
>
> v2: validate PDs/PTs first
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>  3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f68ac56..48e18cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>  
>  	}
>  
> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>  
>  	if (amdgpu_vm_debug && p->bo_list) {
>  		/* Invalidate all BOs to test for userspace bugs */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4cdfb70..6cd20e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			spin_unlock(&glob->lru_lock);
>  		}
>  
> -		if (vm->use_cpu_for_update) {
> +		if (bo->tbo.type == ttm_bo_type_kernel &&
> +		    vm->use_cpu_for_update) {
>  			r = amdgpu_bo_kmap(bo, NULL);
>  			if (r)
>  				return r;
>  		}
>  
>  		spin_lock(&vm->status_lock);
> -		list_move(&bo_base->vm_status, &vm->relocated);
> +		if (bo->tbo.type != ttm_bo_type_kernel)
> +			list_move(&bo_base->vm_status, &vm->moved);
> +		else
> +			list_move(&bo_base->vm_status, &vm->relocated);
>  	}
>  	spin_unlock(&vm->status_lock);
>  
> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  }
>  
>  /**
> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>   *
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
> + * @sync: sync object to add fences to
>   *
> - * Make sure all moved BOs are cleared in the PT.
> + * Make sure all BOs which are moved are updated in the PTs.
>   * Returns 0 for success.
>   *
> - * PTs have to be reserved and mutex must be locked!
> + * PTs have to be reserved!
>   */
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			    struct amdgpu_sync *sync)
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync)
>  {
>  	struct amdgpu_bo_va *bo_va = NULL;
> +	bool clear;
>  	int r = 0;
>  
>  	spin_lock(&vm->status_lock);
> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			struct amdgpu_bo_va, base.vm_status);
>  		spin_unlock(&vm->status_lock);
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
> +		/* Per VM BOs never need to bo cleared in the page tables */
> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
> +
> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);

Just thinking out loud: Per VM BOs don't need to be cleared because they
have separate lists for evicted and moved (but valid) BOs. If we had a
similar thing for non-VM BOs (but without the automatic validation),
this case could be optimized. Then moved but valid BOs could be updated
in the page table instead of clearing them here and updating the page
table again later.

>  		if (r)
>  			return r;
>  
> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>  	return bo_va;
>  }
>  
> +
> +/**
> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
> + *
> + * @adev: amdgpu_device pointer
> + * @bo_va: bo_va to store the address
> + * @mapping: the mapping to insert
> + *
> + * Insert a new mapping into all structures.
> + */
> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> +				    struct amdgpu_bo_va *bo_va,
> +				    struct amdgpu_bo_va_mapping *mapping)
> +{
> +	struct amdgpu_vm *vm = bo_va->base.vm;
> +	struct amdgpu_bo *bo = bo_va->base.bo;
> +
> +	list_add(&mapping->list, &bo_va->invalids);
> +	amdgpu_vm_it_insert(mapping, &vm->va);
> +
> +	if (mapping->flags & AMDGPU_PTE_PRT)
> +		amdgpu_vm_prt_get(adev);
> +
> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> +		spin_lock(&vm->status_lock);
> +		list_move(&bo_va->base.vm_status, &vm->moved);
> +		spin_unlock(&vm->status_lock);
> +	}
> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +}
> +
>  /**
>   * amdgpu_vm_bo_map - map bo inside a vm
>   *
> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>  	if (!mapping)
>  		return -ENOMEM;
>  
> -	INIT_LIST_HEAD(&mapping->list);
>  	mapping->start = saddr;
>  	mapping->last = eaddr;
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  {
>  	struct amdgpu_bo_va_mapping *mapping;
>  	struct amdgpu_bo *bo = bo_va->base.bo;
> -	struct amdgpu_vm *vm = bo_va->base.vm;
>  	uint64_t eaddr;
>  	int r;
>  
> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  		bo_base->moved = true;
>  		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>  			spin_lock(&bo_base->vm->status_lock);
> -			list_move(&bo_base->vm_status, &vm->evicted);
> +			if (bo->tbo.type == ttm_bo_type_kernel)
> +				list_move(&bo_base->vm_status, &vm->evicted);
> +			else
> +				list_move_tail(&bo_base->vm_status,
> +					       &vm->evicted);
>  			spin_unlock(&bo_base->vm->status_lock);
>  			continue;
>  		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index c3753af..90b7741 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  			  struct amdgpu_vm *vm,
>  			  struct dma_fence **fence);
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			  struct amdgpu_sync *sync);
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync);
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  			struct amdgpu_bo_va *bo_va,
>  			bool clear);
Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Some more thoughts inline, but nothing that should be addressed in this
> change.
>
> Regards,
>    Felix
>
>
> On 2017-08-30 11:00 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>> need to be specified in the BO lists.
>>
>> v2: validate PDs/PTs first
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>   3 files changed, 60 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index f68ac56..48e18cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>   
>>   	}
>>   
>> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>   
>>   	if (amdgpu_vm_debug && p->bo_list) {
>>   		/* Invalidate all BOs to test for userspace bugs */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 4cdfb70..6cd20e7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			spin_unlock(&glob->lru_lock);
>>   		}
>>   
>> -		if (vm->use_cpu_for_update) {
>> +		if (bo->tbo.type == ttm_bo_type_kernel &&
>> +		    vm->use_cpu_for_update) {
>>   			r = amdgpu_bo_kmap(bo, NULL);
>>   			if (r)
>>   				return r;
>>   		}
>>   
>>   		spin_lock(&vm->status_lock);
>> -		list_move(&bo_base->vm_status, &vm->relocated);
>> +		if (bo->tbo.type != ttm_bo_type_kernel)
>> +			list_move(&bo_base->vm_status, &vm->moved);
>> +		else
>> +			list_move(&bo_base->vm_status, &vm->relocated);
>>   	}
>>   	spin_unlock(&vm->status_lock);
>>   
>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>    *
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>> + * @sync: sync object to add fences to
>>    *
>> - * Make sure all moved BOs are cleared in the PT.
>> + * Make sure all BOs which are moved are updated in the PTs.
>>    * Returns 0 for success.
>>    *
>> - * PTs have to be reserved and mutex must be locked!
>> + * PTs have to be reserved!
>>    */
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			    struct amdgpu_sync *sync)
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync)
>>   {
>>   	struct amdgpu_bo_va *bo_va = NULL;
>> +	bool clear;
>>   	int r = 0;
>>   
>>   	spin_lock(&vm->status_lock);
>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			struct amdgpu_bo_va, base.vm_status);
>>   		spin_unlock(&vm->status_lock);
>>   
>> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
>> +		/* Per VM BOs never need to bo cleared in the page tables */
>> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>> +
>> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);
> Just thinking out loud: Per VM BOs don't need to be cleared because they
> have separate lists for evicted and moved (but valid) BOs.

Actually the reason is that we know they are reserved together with the 
page directory.

> If we had a
> similar thing for non-VM BOs (but without the automatic validation),
> this case could be optimized. Then moved but valid BOs could be updated
> in the page table instead of clearing them here and updating the page
> table again later.

Well, that won't work because other BOs can change their location while 
we try to use their location.

But what I'm speculating about for a while now is to use trylock on the 
BOs here. That might not work all the time, but clearly most of the time.

Going to give that a try now,
Christian.

>
>>   		if (r)
>>   			return r;
>>   
>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>   	return bo_va;
>>   }
>>   
>> +
>> +/**
>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @bo_va: bo_va to store the address
>> + * @mapping: the mapping to insert
>> + *
>> + * Insert a new mapping into all structures.
>> + */
>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>> +				    struct amdgpu_bo_va *bo_va,
>> +				    struct amdgpu_bo_va_mapping *mapping)
>> +{
>> +	struct amdgpu_vm *vm = bo_va->base.vm;
>> +	struct amdgpu_bo *bo = bo_va->base.bo;
>> +
>> +	list_add(&mapping->list, &bo_va->invalids);
>> +	amdgpu_vm_it_insert(mapping, &vm->va);
>> +
>> +	if (mapping->flags & AMDGPU_PTE_PRT)
>> +		amdgpu_vm_prt_get(adev);
>> +
>> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>> +		spin_lock(&vm->status_lock);
>> +		list_move(&bo_va->base.vm_status, &vm->moved);
>> +		spin_unlock(&vm->status_lock);
>> +	}
>> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_map - map bo inside a vm
>>    *
>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>   	if (!mapping)
>>   		return -ENOMEM;
>>   
>> -	INIT_LIST_HEAD(&mapping->list);
>>   	mapping->start = saddr;
>>   	mapping->last = eaddr;
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   {
>>   	struct amdgpu_bo_va_mapping *mapping;
>>   	struct amdgpu_bo *bo = bo_va->base.bo;
>> -	struct amdgpu_vm *vm = bo_va->base.vm;
>>   	uint64_t eaddr;
>>   	int r;
>>   
>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>   	mapping->offset = offset;
>>   	mapping->flags = flags;
>>   
>> -	list_add(&mapping->list, &bo_va->invalids);
>> -	amdgpu_vm_it_insert(mapping, &vm->va);
>> -
>> -	if (flags & AMDGPU_PTE_PRT)
>> -		amdgpu_vm_prt_get(adev);
>> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
>> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>   
>>   	return 0;
>>   }
>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   		bo_base->moved = true;
>>   		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>   			spin_lock(&bo_base->vm->status_lock);
>> -			list_move(&bo_base->vm_status, &vm->evicted);
>> +			if (bo->tbo.type == ttm_bo_type_kernel)
>> +				list_move(&bo_base->vm_status, &vm->evicted);
>> +			else
>> +				list_move_tail(&bo_base->vm_status,
>> +					       &vm->evicted);
>>   			spin_unlock(&bo_base->vm->status_lock);
>>   			continue;
>>   		}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index c3753af..90b7741 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   			  struct amdgpu_vm *vm,
>>   			  struct dma_fence **fence);
>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -			  struct amdgpu_sync *sync);
>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> +			   struct amdgpu_vm *vm,
>> +			   struct amdgpu_sync *sync);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>>   			bool clear);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> > Just thinking out loud: Per VM BOs don't need to be cleared because they
> > have separate lists for evicted and moved (but valid) BOs.
>
> Actually the reason is that we know they are reserved together with the
> page directory.

I mentioned separate lists because that allows us to distinguish valid and invalid BOs. Non-VM BOs are all on the same list. So it's harder to tell, which ones are still valid but have moved, and which ones have been evicted and must be revalidated.

Regards,
  Felix
Am 31.08.2017 um 15:20 schrieb Kuehling, Felix:
>>> Just thinking out loud: Per VM BOs don't need to be cleared because they
>>> have separate lists for evicted and moved (but valid) BOs.
>> Actually the reason is that we know they are reserved together with the
>> page directory.
> I mentioned separate lists because that allows us to distinguish valid and invalid BOs. Non-VM BOs are all on the same list. So it's harder to tell, which ones are still valid but have moved, and which ones have been evicted and must be revalidated.

Actually that's not an issue, even evicted BOs are handled correctly 
here and shouldn't be revalidated just because somebody uses the VM.

It's just that we could as well set the new location instead of clearing 
the VM page tables of BOs which aren't part of the current working set.

Regards,
Christian.

>
> Regards,
>    Felix
>
> ________________________________________
> From: Christian König <deathsimple@vodafone.de>
> Sent: Thursday, August 31, 2017 8:20:58 AM
> To: Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, Christian
> Subject: Re: [PATCH 2/4] drm/amdgpu: add support for per VM BOs v2
>
> Am 31.08.2017 um 00:50 schrieb Felix Kuehling:
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>
>> Some more thoughts inline, but nothing that should be addressed in this
>> change.
>>
>> Regards,
>>     Felix
>>
>>
>> On 2017-08-30 11:00 AM, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
>>> need to be specified in the BO lists.
>>>
>>> v2: validate PDs/PTs first
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>>>    3 files changed, 60 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index f68ac56..48e18cc 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>>>
>>>       }
>>>
>>> -    r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
>>> +    r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>>>
>>>       if (amdgpu_vm_debug && p->bo_list) {
>>>               /* Invalidate all BOs to test for userspace bugs */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 4cdfb70..6cd20e7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                       spin_unlock(&glob->lru_lock);
>>>               }
>>>
>>> -            if (vm->use_cpu_for_update) {
>>> +            if (bo->tbo.type == ttm_bo_type_kernel &&
>>> +                vm->use_cpu_for_update) {
>>>                       r = amdgpu_bo_kmap(bo, NULL);
>>>                       if (r)
>>>                               return r;
>>>               }
>>>
>>>               spin_lock(&vm->status_lock);
>>> -            list_move(&bo_base->vm_status, &vm->relocated);
>>> +            if (bo->tbo.type != ttm_bo_type_kernel)
>>> +                    list_move(&bo_base->vm_status, &vm->moved);
>>> +            else
>>> +                    list_move(&bo_base->vm_status, &vm->relocated);
>>>       }
>>>       spin_unlock(&vm->status_lock);
>>>
>>> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>    }
>>>
>>>    /**
>>> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
>>> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>>>     *
>>>     * @adev: amdgpu_device pointer
>>>     * @vm: requested vm
>>> + * @sync: sync object to add fences to
>>>     *
>>> - * Make sure all moved BOs are cleared in the PT.
>>> + * Make sure all BOs which are moved are updated in the PTs.
>>>     * Returns 0 for success.
>>>     *
>>> - * PTs have to be reserved and mutex must be locked!
>>> + * PTs have to be reserved!
>>>     */
>>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> -                        struct amdgpu_sync *sync)
>>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>> +                       struct amdgpu_vm *vm,
>>> +                       struct amdgpu_sync *sync)
>>>    {
>>>       struct amdgpu_bo_va *bo_va = NULL;
>>> +    bool clear;
>>>       int r = 0;
>>>
>>>       spin_lock(&vm->status_lock);
>>> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                       struct amdgpu_bo_va, base.vm_status);
>>>               spin_unlock(&vm->status_lock);
>>>
>>> -            r = amdgpu_vm_bo_update(adev, bo_va, true);
>>> +            /* Per VM BOs never need to bo cleared in the page tables */
>>> +            clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
>>> +
>>> +            r = amdgpu_vm_bo_update(adev, bo_va, clear);
>> Just thinking out loud: Per VM BOs don't need to be cleared because they
>> have separate lists for evicted and moved (but valid) BOs.
> Actually the reason is that we know they are reserved together with the
> page directory.
>
>> If we had a
>> similar thing for non-VM BOs (but without the automatic validation),
>> this case could be optimized. Then moved but valid BOs could be updated
>> in the page table instead of clearing them here and updating the page
>> table again later.
> Well, that won't work because other BOs can change their location while
> we try to use their location.
>
> But what I'm speculating about for a while now is to use trylock on the
> BOs here. That might not work all the time, but clearly most of the time.
>
> Going to give that a try now,
> Christian.
>
>>>               if (r)
>>>                       return r;
>>>
>>> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>>       return bo_va;
>>>    }
>>>
>>> +
>>> +/**
>>> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @bo_va: bo_va to store the address
>>> + * @mapping: the mapping to insert
>>> + *
>>> + * Insert a new mapping into all structures.
>>> + */
>>> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>> +                                struct amdgpu_bo_va *bo_va,
>>> +                                struct amdgpu_bo_va_mapping *mapping)
>>> +{
>>> +    struct amdgpu_vm *vm = bo_va->base.vm;
>>> +    struct amdgpu_bo *bo = bo_va->base.bo;
>>> +
>>> +    list_add(&mapping->list, &bo_va->invalids);
>>> +    amdgpu_vm_it_insert(mapping, &vm->va);
>>> +
>>> +    if (mapping->flags & AMDGPU_PTE_PRT)
>>> +            amdgpu_vm_prt_get(adev);
>>> +
>>> +    if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>> +            spin_lock(&vm->status_lock);
>>> +            list_move(&bo_va->base.vm_status, &vm->moved);
>>> +            spin_unlock(&vm->status_lock);
>>> +    }
>>> +    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +}
>>> +
>>>    /**
>>>     * amdgpu_vm_bo_map - map bo inside a vm
>>>     *
>>> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>>>       if (!mapping)
>>>               return -ENOMEM;
>>>
>>> -    INIT_LIST_HEAD(&mapping->list);
>>>       mapping->start = saddr;
>>>       mapping->last = eaddr;
>>>       mapping->offset = offset;
>>>       mapping->flags = flags;
>>>
>>> -    list_add(&mapping->list, &bo_va->invalids);
>>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>>> -
>>> -    if (flags & AMDGPU_PTE_PRT)
>>> -            amdgpu_vm_prt_get(adev);
>>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>
>>>       return 0;
>>>    }
>>> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>>    {
>>>       struct amdgpu_bo_va_mapping *mapping;
>>>       struct amdgpu_bo *bo = bo_va->base.bo;
>>> -    struct amdgpu_vm *vm = bo_va->base.vm;
>>>       uint64_t eaddr;
>>>       int r;
>>>
>>> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>>>       mapping->offset = offset;
>>>       mapping->flags = flags;
>>>
>>> -    list_add(&mapping->list, &bo_va->invalids);
>>> -    amdgpu_vm_it_insert(mapping, &vm->va);
>>> -
>>> -    if (flags & AMDGPU_PTE_PRT)
>>> -            amdgpu_vm_prt_get(adev);
>>> -    trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> +    amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>>>
>>>       return 0;
>>>    }
>>> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>>               bo_base->moved = true;
>>>               if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>>>                       spin_lock(&bo_base->vm->status_lock);
>>> -                    list_move(&bo_base->vm_status, &vm->evicted);
>>> +                    if (bo->tbo.type == ttm_bo_type_kernel)
>>> +                            list_move(&bo_base->vm_status, &vm->evicted);
>>> +                    else
>>> +                            list_move_tail(&bo_base->vm_status,
>>> +                                           &vm->evicted);
>>>                       spin_unlock(&bo_base->vm->status_lock);
>>>                       continue;
>>>               }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index c3753af..90b7741 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>>    int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>                         struct amdgpu_vm *vm,
>>>                         struct dma_fence **fence);
>>> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>> -                      struct amdgpu_sync *sync);
>>> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>> +                       struct amdgpu_vm *vm,
>>> +                       struct amdgpu_sync *sync);
>>>    int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>                       struct amdgpu_bo_va *bo_va,
>>>                       bool clear);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx