[4/5] drm/amd/amdgpu: add check that shadow page directory is GPU-accessible

Submitted by Nicolai Hähnle on Dec. 15, 2016, 5:10 p.m.

Details

Message ID 1481821815-9971-5-git-send-email-nhaehnle@gmail.com
State New
Headers show
Series "drm/ttm, amdgpu: fix crashes due to shadow page table evictions" ( rev: 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Nicolai Hähnle Dec. 15, 2016, 5:10 p.m.
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

Skip amdgpu_gem_va_update_vm when shadow the page directory is swapped out.
Clean up the check for non-shadow BOs as well using the new helper function.

This fixes a crash with the stack trace:

amdgpu_gem_va_update_vm
-> amdgpu_vm_update_page_directory
 -> amdgpu_ttm_bind
  -> amdgpu_gtt_mgr_alloc

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index cd62f6f..4e1eb05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -489,44 +489,49 @@  static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct amdgpu_bo_va *bo_va,
 				    uint32_t operation)
 {
 	struct ttm_validate_buffer tv, *entry;
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ww_acquire_ctx ticket;
 	struct list_head list, duplicates;
-	unsigned domain;
 	int r;
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo_va->bo->tbo;
 	tv.shared = true;
 	list_add(&tv.head, &list);
 
 	amdgpu_vm_get_pd_bo(bo_va->vm, &list, &vm_pd);
 
 	/* Provide duplicates to avoid -EALREADY */
 	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_print;
 
 	list_for_each_entry(entry, &list, head) {
-		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
+		struct amdgpu_bo *bo =
+			container_of(entry->bo, struct amdgpu_bo, tbo);
+
 		/* if anything is swapped out don't swap it in here,
 		   just abort and wait for the next CS */
-		if (domain == AMDGPU_GEM_DOMAIN_CPU)
+		if (!amdgpu_bo_gpu_accessible(bo))
+			goto error_unreserve;
+
+		if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
 			goto error_unreserve;
 	}
+
 	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
 				      NULL);
 	if (r)
 		goto error_unreserve;
 
 	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
 	if (r)
 		goto error_unreserve;
 
 	r = amdgpu_vm_clear_freed(adev, bo_va->vm);

Comments

On 2016年12月16日 01:10, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle@amd.com>
>
> Skip amdgpu_gem_va_update_vm when shadow the page directory is swapped out.
> Clean up the check for non-shadow BOs as well using the new helper function.
>
> This fixes a crash with the stack trace:
>
> amdgpu_gem_va_update_vm
> -> amdgpu_vm_update_page_directory
>   -> amdgpu_ttm_bind
>    -> amdgpu_gtt_mgr_alloc
>
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 11 ++++++++---
>   1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index cd62f6f..4e1eb05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -489,44 +489,49 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
>    * vital here, so they are not reported back to userspace.
>    */
>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   				    struct amdgpu_bo_va *bo_va,
>   				    uint32_t operation)
>   {
>   	struct ttm_validate_buffer tv, *entry;
>   	struct amdgpu_bo_list_entry vm_pd;
>   	struct ww_acquire_ctx ticket;
>   	struct list_head list, duplicates;
> -	unsigned domain;
>   	int r;
>   
>   	INIT_LIST_HEAD(&list);
>   	INIT_LIST_HEAD(&duplicates);
>   
>   	tv.bo = &bo_va->bo->tbo;
>   	tv.shared = true;
>   	list_add(&tv.head, &list);
>   
>   	amdgpu_vm_get_pd_bo(bo_va->vm, &list, &vm_pd);
>   
>   	/* Provide duplicates to avoid -EALREADY */
>   	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>   	if (r)
>   		goto error_print;
>   
>   	list_for_each_entry(entry, &list, head) {
> -		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
> +		struct amdgpu_bo *bo =
> +			container_of(entry->bo, struct amdgpu_bo, tbo);
> +
>   		/* if anything is swapped out don't swap it in here,
>   		   just abort and wait for the next CS */
> -		if (domain == AMDGPU_GEM_DOMAIN_CPU)
> +		if (!amdgpu_bo_gpu_accessible(bo))
> +			goto error_unreserve;
> +
> +		if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
>   			goto error_unreserve;
>   	}
> +
>   	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
>   				      NULL);
>   	if (r)
>   		goto error_unreserve;
>   
>   	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
>   	if (r)
>   		goto error_unreserve;
>   
>   	r = amdgpu_vm_clear_freed(adev, bo_va->vm);