[2/5] drm/amd/amdgpu: move eviction counting to amdgpu_bo_move_notify

Submitted by Nicolai Hähnle on Dec. 15, 2016, 5:10 p.m.

Details

Message ID 1481821815-9971-3-git-send-email-nhaehnle@gmail.com
State New
Headers show
Series "drm/ttm, amdgpu: fix crashes due to shadow page table evictions" ( rev: 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Nicolai Hähnle Dec. 15, 2016, 5:10 p.m.
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

This catches evictions of shadow page tables from the GART. Since shadow
page tables are always stored in system memory, amdgpu_bo_move is never
called for them.

This fixes a crash during command submission that occurs when only a shadow
page table and no other BOs were evicted since the last submission.

Fixes: 1baa439fb2f4e586 ("drm/amdgpu: allocate shadow for pd/pt bo V2")
Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c29db99..d94cdef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -855,20 +855,24 @@  void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
 
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	amdgpu_vm_bo_invalidate(adev, abo);
 
+	/* remember the eviction */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+
 	/* update statistics */
 	if (!new_mem)
 		return;
 
 	/* move_notify is called before move happens */
 	amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
 
 	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8f18b8e..80924c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -460,24 +460,20 @@  static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
 	/* Can't move a pinned BO */
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	if (WARN_ON_ONCE(abo->pin_count > 0))
 		return -EINVAL;
 
 	adev = amdgpu_ttm_adev(bo->bdev);
 
-	/* remember the eviction */
-	if (evict)
-		atomic64_inc(&adev->num_evictions);
-
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
 		return 0;
 	}
 	if ((old_mem->mem_type == TTM_PL_TT &&
 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
 	     new_mem->mem_type == TTM_PL_TT)) {
 		/* bind is enough */
 		amdgpu_move_null(bo, new_mem);

Comments

On 2016年12月16日 01:10, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle@amd.com>
>
> This catches evictions of shadow page tables from the GART. Since shadow
> page tables are always stored in system memory, amdgpu_bo_move is never
> called for them.
>
> This fixes a crash during command submission that occurs when only a shadow
> page table and no other BOs were evicted since the last submission.
>
> Fixes: 1baa439fb2f4e586 ("drm/amdgpu: allocate shadow for pd/pt bo V2")
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Acked-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 4 ----
>   2 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index c29db99..d94cdef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -855,20 +855,24 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>   	struct amdgpu_bo *abo;
>   	struct ttm_mem_reg *old_mem = &bo->mem;
>   
>   	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
>   		return;
>   
>   	abo = container_of(bo, struct amdgpu_bo, tbo);
>   	amdgpu_vm_bo_invalidate(adev, abo);
>   
> +	/* remember the eviction */
> +	if (evict)
> +		atomic64_inc(&adev->num_evictions);
> +
>   	/* update statistics */
>   	if (!new_mem)
>   		return;
>   
>   	/* move_notify is called before move happens */
>   	amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
>   
>   	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8f18b8e..80924c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -460,24 +460,20 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
>   	struct ttm_mem_reg *old_mem = &bo->mem;
>   	int r;
>   
>   	/* Can't move a pinned BO */
>   	abo = container_of(bo, struct amdgpu_bo, tbo);
>   	if (WARN_ON_ONCE(abo->pin_count > 0))
>   		return -EINVAL;
>   
>   	adev = amdgpu_ttm_adev(bo->bdev);
>   
> -	/* remember the eviction */
> -	if (evict)
> -		atomic64_inc(&adev->num_evictions);
> -
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		amdgpu_move_null(bo, new_mem);
>   		return 0;
>   	}
>   	if ((old_mem->mem_type == TTM_PL_TT &&
>   	     new_mem->mem_type == TTM_PL_SYSTEM) ||
>   	    (old_mem->mem_type == TTM_PL_SYSTEM &&
>   	     new_mem->mem_type == TTM_PL_TT)) {
>   		/* bind is enough */
>   		amdgpu_move_null(bo, new_mem);