[2/2] drm/amd/amdgpu: skip amdgpu_gem_va_update_vm when shadow page directory is swapped out

Submitted by Nicolai Hähnle on Dec. 12, 2016, 3:04 p.m.

Details

Message ID 1481555054-27405-2-git-send-email-nhaehnle@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Nicolai Hähnle Dec. 12, 2016, 3:04 p.m.
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

This fixes a crash with the following stack trace:

 amdgpu_gtt_mgr_alloc
 amdgpu_ttm_bind
 amdgpu_vm_update_page_directory
 amdgpu_gem_va_update_vm

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index fbfbe5a..ffb3e70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -495,20 +495,21 @@  static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
  *
  * Update the bo_va directly after setting it's address. Errors are not
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct amdgpu_bo_va *bo_va,
 				    uint32_t operation)
 {
 	struct ttm_validate_buffer tv, *entry;
 	struct amdgpu_bo_list_entry vm_pd;
+	struct amdgpu_bo *pd_shadow;
 	struct ww_acquire_ctx ticket;
 	struct list_head list, duplicates;
 	unsigned domain;
 	int r;
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo_va->bo->tbo;
 	tv.shared = true;
@@ -521,20 +522,30 @@  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;
 
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
 		   just abort and wait for the next CS */
 		if (domain == AMDGPU_GEM_DOMAIN_CPU)
 			goto error_unreserve;
 	}
+
+	/* Also abort if the page directory shadow has been swapped out. */
+	pd_shadow = bo_va->vm->page_directory->shadow;
+	if (pd_shadow) {
+		domain = amdgpu_mem_type_to_domain(pd_shadow->tbo.mem.mem_type);
+
+		if (domain == AMDGPU_GEM_DOMAIN_CPU)
+			goto error_unreserve;
+	}
+
 	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
 				      NULL);
 	if (r)
 		goto error_unreserve;
 
 	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
 	if (r)
 		goto error_unreserve;
 
 	r = amdgpu_vm_clear_freed(adev, bo_va->vm);

Comments

Am 12.12.2016 um 16:04 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle <nicolai.haehnle@amd.com>
>
> This fixes a crash with the following stack trace:
>
>   amdgpu_gtt_mgr_alloc
>   amdgpu_ttm_bind
>   amdgpu_vm_update_page_directory
>   amdgpu_gem_va_update_vm
>
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 11 +++++++++++
>   1 file changed, 11 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index fbfbe5a..ffb3e70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -495,20 +495,21 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
>    *
>    * Update the bo_va directly after setting it's address. Errors are not
>    * vital here, so they are not reported back to userspace.
>    */
>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   				    struct amdgpu_bo_va *bo_va,
>   				    uint32_t operation)
>   {
>   	struct ttm_validate_buffer tv, *entry;
>   	struct amdgpu_bo_list_entry vm_pd;
> +	struct amdgpu_bo *pd_shadow;
>   	struct ww_acquire_ctx ticket;
>   	struct list_head list, duplicates;
>   	unsigned domain;
>   	int r;
>   
>   	INIT_LIST_HEAD(&list);
>   	INIT_LIST_HEAD(&duplicates);
>   
>   	tv.bo = &bo_va->bo->tbo;
>   	tv.shared = true;
> @@ -521,20 +522,30 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_print;
>   
>   	list_for_each_entry(entry, &list, head) {
>   		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
>   		/* if anything is swapped out don't swap it in here,
>   		   just abort and wait for the next CS */
>   		if (domain == AMDGPU_GEM_DOMAIN_CPU)
>   			goto error_unreserve;

I would just do the check in this loop instead of down below specialized 
for the page directory.

We have only two entries in the list anyway and I actually would suggest 
to have something like a amdgpu_bo_swapped_out() or 
amdgpu_bo_gpu_accessible() helper in amdgpu_object.h.

Regards,
Christian.

>   	}
> +
> +	/* Also abort if the page directory shadow has been swapped out. */
> +	pd_shadow = bo_va->vm->page_directory->shadow;
> +	if (pd_shadow) {
> +		domain = amdgpu_mem_type_to_domain(pd_shadow->tbo.mem.mem_type);
> +
> +		if (domain == AMDGPU_GEM_DOMAIN_CPU)
> +			goto error_unreserve;
> +	}
> +
>   	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
>   				      NULL);
>   	if (r)
>   		goto error_unreserve;
>   
>   	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
>   	if (r)
>   		goto error_unreserve;
>   
>   	r = amdgpu_vm_clear_freed(adev, bo_va->vm);