[2/2] drm/amdgpu: validate fallback BOs

Submitted by Zhou, David(ChunMing) on March 19, 2018, 10 a.m.

Details

Message ID 20180319100048.8925-2-david1.zhou@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Zhou, David(ChunMing) March 19, 2018, 10 a.m.
issue:
Game F1 performance drops 13% when per vm bo is enabled.

root cause:
if some BOs are fallback to allowed domain, they will never be validated if no eviction happens,
that means they always exist in allowed domain.

Fix:
maintain a per vm allowed domain BOs list, then try to validated them with perferred domain.

Change-Id: I4335470bf867b46ac93c8e2531eac3f8ba9ac2da
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 49 ++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 ++++-
 3 files changed, 63 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 383bf2d31c92..7509b6bd2047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -359,7 +359,7 @@  void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 }
 
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
-				 struct amdgpu_bo *bo)
+				 struct amdgpu_bo *bo, bool *allowed)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_operation_ctx ctx = {
@@ -374,6 +374,8 @@  static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	if (bo->pin_count)
 		return 0;
 
+	if (allowed)
+		*allowed = false;
 	/* Don't move this buffer if we have depleted our allowance
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
@@ -396,6 +398,9 @@  static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	}
 
 retry:
+	if (domain != bo->preferred_domains && domain == bo->allowed_domains &&
+	    allowed)
+		*allowed = true;
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 
@@ -479,19 +484,19 @@  static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 	return false;
 }
 
-static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
+static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo, bool *allowed)
 {
 	struct amdgpu_cs_parser *p = param;
 	int r;
 
 	do {
-		r = amdgpu_cs_bo_validate(p, bo);
+		r = amdgpu_cs_bo_validate(p, bo, allowed);
 	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
 	if (r)
 		return r;
 
 	if (bo->shadow)
-		r = amdgpu_cs_bo_validate(p, bo->shadow);
+		r = amdgpu_cs_bo_validate(p, bo->shadow, NULL);
 
 	return r;
 }
@@ -528,7 +533,7 @@  static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (p->evictable == lobj)
 			p->evictable = NULL;
 
-		r = amdgpu_cs_validate(p, bo);
+		r = amdgpu_cs_validate(p, bo, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e9a41dd05345..365e8dca05f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -186,6 +186,35 @@  void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
+static int amdgpu_vm_try_validate_allowed(struct amdgpu_device *adev,
+					  struct amdgpu_vm *vm,
+					  int (*validate)(void *p,
+							  struct amdgpu_bo *bo,
+							  bool *allowed),
+					  void *param)
+{
+	struct amdgpu_vm_bo_base *bo_base, *tmp;
+	int r;
+	bool allowed;
+
+	spin_lock(&vm->status_lock);
+	list_for_each_entry_safe(bo_base, tmp, &vm->allowed_domain,
+				 allowed_domain_list) {
+		spin_unlock(&vm->status_lock);
+		r = validate(param, bo_base->bo, &allowed);
+		if (r)
+			return r;
+		spin_lock(&vm->status_lock);
+		if (!allowed)
+			list_del_init(&bo_base->allowed_domain_list);
+		if (bo_base->bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
+	}
+	spin_unlock(&vm->status_lock);
+	return 0;
+}
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -197,16 +226,19 @@  void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
  * Validate the page table BOs on command submission if neccessary.
  */
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			      int (*validate)(void *p, struct amdgpu_bo *bo),
+			      int (*validate)(void *p, struct amdgpu_bo *bo,
+					      bool *allowed),
 			      void *param)
 {
 	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	LIST_HEAD(tmp_allowed);
 	int r;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->evicted)) {
 		struct amdgpu_vm_bo_base *bo_base;
 		struct amdgpu_bo *bo;
+		bool allowed = false;
 
 		bo_base = list_first_entry(&vm->evicted,
 					   struct amdgpu_vm_bo_base,
@@ -216,7 +248,7 @@  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		bo = bo_base->bo;
 		BUG_ON(!bo);
 		if (bo->parent) {
-			r = validate(param, bo);
+			r = validate(param, bo, &allowed);
 			if (r)
 				return r;
 
@@ -235,6 +267,10 @@  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 
 		spin_lock(&vm->status_lock);
+		if (allowed)
+			list_add_tail(&bo_base->allowed_domain_list,
+				      &tmp_allowed);
+
 		if (bo->tbo.type != ttm_bo_type_kernel)
 			list_move(&bo_base->vm_status, &vm->moved);
 		else
@@ -242,6 +278,12 @@  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	spin_unlock(&vm->status_lock);
 
+	r = amdgpu_vm_try_validate_allowed(adev, vm, validate, param);
+	if (r)
+		return r;
+	spin_lock(&vm->status_lock);
+	list_splice_init(&tmp_allowed, &vm->allowed_domain);
+	spin_unlock(&vm->status_lock);
 	return 0;
 }
 
@@ -1868,6 +1910,7 @@  struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	bo_va->base.bo = bo;
 	INIT_LIST_HEAD(&bo_va->base.bo_list);
 	INIT_LIST_HEAD(&bo_va->base.vm_status);
+	INIT_LIST_HEAD(&bo_va->base.allowed_domain_list);
 
 	bo_va->ref_count = 1;
 	INIT_LIST_HEAD(&bo_va->valids);
@@ -2237,6 +2280,7 @@  void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	spin_lock(&vm->status_lock);
 	list_del(&bo_va->base.vm_status);
+	list_del(&bo_va->base.allowed_domain_list);
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
@@ -2409,6 +2453,7 @@  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
+	INIT_LIST_HEAD(&vm->allowed_domain);
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->moved);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cf2c667ee538..54c39d3ea6d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -149,6 +149,7 @@  struct amdgpu_vm_bo_base {
 
 	/* protected by spinlock */
 	struct list_head		vm_status;
+	struct list_head		allowed_domain_list;
 
 	/* protected by the BO being reserved */
 	bool				moved;
@@ -177,6 +178,9 @@  struct amdgpu_vm {
 	/* protecting invalidated */
 	spinlock_t		status_lock;
 
+	/* per vm bo is validated to allowed domain */
+	struct list_head	allowed_domain;
+
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
@@ -266,7 +270,8 @@  void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct amdgpu_bo_list_entry *entry);
 bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			      int (*callback)(void *p, struct amdgpu_bo *bo),
+			      int (*callback)(void *p, struct amdgpu_bo *bo,
+					      bool *allowed),
 			      void *param);
 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,

Comments

Am 19.03.2018 um 11:00 schrieb Chunming Zhou:
> issue:
> Game F1 performance drops 13% when per vm bo is enabled.
>
> root cause:
> if some BOs are fallback to allowed domain, they will never be validated if no eviction happens,
> that means they always exist in allowed domain.
>
> Fix:
> maintain a per vm allowed domain BOs list, then try to validated them with perferred domain.

The idea sounds good, but I find the implementation a bit overkill.

Give me a second to hack something together,
Christian.

>
> Change-Id: I4335470bf867b46ac93c8e2531eac3f8ba9ac2da
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 49 ++++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 ++++-
>   3 files changed, 63 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 383bf2d31c92..7509b6bd2047 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -359,7 +359,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
>   }
>   
>   static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
> -				 struct amdgpu_bo *bo)
> +				 struct amdgpu_bo *bo, bool *allowed)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   	struct ttm_operation_ctx ctx = {
> @@ -374,6 +374,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
>   	if (bo->pin_count)
>   		return 0;
>   
> +	if (allowed)
> +		*allowed = false;
>   	/* Don't move this buffer if we have depleted our allowance
>   	 * to move it. Don't move anything if the threshold is zero.
>   	 */
> @@ -396,6 +398,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
>   	}
>   
>   retry:
> +	if (domain != bo->preferred_domains && domain == bo->allowed_domains &&
> +	    allowed)
> +		*allowed = true;
>   	amdgpu_ttm_placement_from_domain(bo, domain);
>   	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   
> @@ -479,19 +484,19 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
>   	return false;
>   }
>   
> -static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
> +static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo, bool *allowed)
>   {
>   	struct amdgpu_cs_parser *p = param;
>   	int r;
>   
>   	do {
> -		r = amdgpu_cs_bo_validate(p, bo);
> +		r = amdgpu_cs_bo_validate(p, bo, allowed);
>   	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
>   	if (r)
>   		return r;
>   
>   	if (bo->shadow)
> -		r = amdgpu_cs_bo_validate(p, bo->shadow);
> +		r = amdgpu_cs_bo_validate(p, bo->shadow, NULL);
>   
>   	return r;
>   }
> @@ -528,7 +533,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>   		if (p->evictable == lobj)
>   			p->evictable = NULL;
>   
> -		r = amdgpu_cs_validate(p, bo);
> +		r = amdgpu_cs_validate(p, bo, NULL);
>   		if (r)
>   			return r;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index e9a41dd05345..365e8dca05f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -186,6 +186,35 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   	list_add(&entry->tv.head, validated);
>   }
>   
> +static int amdgpu_vm_try_validate_allowed(struct amdgpu_device *adev,
> +					  struct amdgpu_vm *vm,
> +					  int (*validate)(void *p,
> +							  struct amdgpu_bo *bo,
> +							  bool *allowed),
> +					  void *param)
> +{
> +	struct amdgpu_vm_bo_base *bo_base, *tmp;
> +	int r;
> +	bool allowed;
> +
> +	spin_lock(&vm->status_lock);
> +	list_for_each_entry_safe(bo_base, tmp, &vm->allowed_domain,
> +				 allowed_domain_list) {
> +		spin_unlock(&vm->status_lock);
> +		r = validate(param, bo_base->bo, &allowed);
> +		if (r)
> +			return r;
> +		spin_lock(&vm->status_lock);
> +		if (!allowed)
> +			list_del_init(&bo_base->allowed_domain_list);
> +		if (bo_base->bo->tbo.type != ttm_bo_type_kernel)
> +			list_move(&bo_base->vm_status, &vm->moved);
> +		else
> +			list_move(&bo_base->vm_status, &vm->relocated);
> +	}
> +	spin_unlock(&vm->status_lock);
> +	return 0;
> +}
>   /**
>    * amdgpu_vm_validate_pt_bos - validate the page table BOs
>    *
> @@ -197,16 +226,19 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>    * Validate the page table BOs on command submission if neccessary.
>    */
>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			      int (*validate)(void *p, struct amdgpu_bo *bo),
> +			      int (*validate)(void *p, struct amdgpu_bo *bo,
> +					      bool *allowed),
>   			      void *param)
>   {
>   	struct ttm_bo_global *glob = adev->mman.bdev.glob;
> +	LIST_HEAD(tmp_allowed);
>   	int r;
>   
>   	spin_lock(&vm->status_lock);
>   	while (!list_empty(&vm->evicted)) {
>   		struct amdgpu_vm_bo_base *bo_base;
>   		struct amdgpu_bo *bo;
> +		bool allowed = false;
>   
>   		bo_base = list_first_entry(&vm->evicted,
>   					   struct amdgpu_vm_bo_base,
> @@ -216,7 +248,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		bo = bo_base->bo;
>   		BUG_ON(!bo);
>   		if (bo->parent) {
> -			r = validate(param, bo);
> +			r = validate(param, bo, &allowed);
>   			if (r)
>   				return r;
>   
> @@ -235,6 +267,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   
>   		spin_lock(&vm->status_lock);
> +		if (allowed)
> +			list_add_tail(&bo_base->allowed_domain_list,
> +				      &tmp_allowed);
> +
>   		if (bo->tbo.type != ttm_bo_type_kernel)
>   			list_move(&bo_base->vm_status, &vm->moved);
>   		else
> @@ -242,6 +278,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	}
>   	spin_unlock(&vm->status_lock);
>   
> +	r = amdgpu_vm_try_validate_allowed(adev, vm, validate, param);
> +	if (r)
> +		return r;
> +	spin_lock(&vm->status_lock);
> +	list_splice_init(&tmp_allowed, &vm->allowed_domain);
> +	spin_unlock(&vm->status_lock);
>   	return 0;
>   }
>   
> @@ -1868,6 +1910,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>   	bo_va->base.bo = bo;
>   	INIT_LIST_HEAD(&bo_va->base.bo_list);
>   	INIT_LIST_HEAD(&bo_va->base.vm_status);
> +	INIT_LIST_HEAD(&bo_va->base.allowed_domain_list);
>   
>   	bo_va->ref_count = 1;
>   	INIT_LIST_HEAD(&bo_va->valids);
> @@ -2237,6 +2280,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>   
>   	spin_lock(&vm->status_lock);
>   	list_del(&bo_va->base.vm_status);
> +	list_del(&bo_va->base.allowed_domain_list);
>   	spin_unlock(&vm->status_lock);
>   
>   	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
> @@ -2409,6 +2453,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
>   		vm->reserved_vmid[i] = NULL;
>   	spin_lock_init(&vm->status_lock);
> +	INIT_LIST_HEAD(&vm->allowed_domain);
>   	INIT_LIST_HEAD(&vm->evicted);
>   	INIT_LIST_HEAD(&vm->relocated);
>   	INIT_LIST_HEAD(&vm->moved);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index cf2c667ee538..54c39d3ea6d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -149,6 +149,7 @@ struct amdgpu_vm_bo_base {
>   
>   	/* protected by spinlock */
>   	struct list_head		vm_status;
> +	struct list_head		allowed_domain_list;
>   
>   	/* protected by the BO being reserved */
>   	bool				moved;
> @@ -177,6 +178,9 @@ struct amdgpu_vm {
>   	/* protecting invalidated */
>   	spinlock_t		status_lock;
>   
> +	/* per vm bo is validated to allowed domain */
> +	struct list_head	allowed_domain;
> +
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
>   
> @@ -266,7 +270,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct amdgpu_bo_list_entry *entry);
>   bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			      int (*callback)(void *p, struct amdgpu_bo *bo),
> +			      int (*callback)(void *p, struct amdgpu_bo *bo,
> +					      bool *allowed),
>   			      void *param);
>   int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   			struct amdgpu_vm *vm,