[2/2] drm/amdgpu: bind GTT on demand

Submitted by Christian König on Sept. 6, 2016, 9:41 a.m.

Details

Message ID 1473154866-2448-2-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Browsing this patch as part of:
"Series without cover letter" rev 1 in AMD X.Org drivers
<< prev patch [2/2] next patch >>

Commit Message

Christian König Sept. 6, 2016, 9:41 a.m.
From: Christian König <christian.koenig@amd.com>

We don't really need the GTT table any more most of the time. So bind it
only on demand.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
 8 files changed, 84 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d39fa8..28d4a67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2521,6 +2521,7 @@  static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 struct amdgpu_bo_va_mapping *
 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
+int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
 #if defined(CONFIG_DRM_AMD_DAL)
 int amdgpu_dm_display_resume(struct amdgpu_device *adev );
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20a1962..e069978 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -640,8 +640,12 @@  static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}
 
-	if (p->uf_entry.robj)
-		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+	if (!r && p->uf_entry.robj) {
+		struct amdgpu_bo *uf = p->uf_entry.robj;
+
+		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
+		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
+	}
 
 error_validate:
 	if (r) {
@@ -1156,3 +1160,29 @@  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 
 	return NULL;
 }
+
+/**
+ * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
+ *
+ * @parser: command submission parser context
+ *
+ * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
+ */
+int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
+{
+	unsigned i;
+	int r;
+
+	if (!parser->bo_list)
+		return 0;
+
+	for (i = 0; i < parser->bo_list->num_entries; i++) {
+		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
+
+		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
+		if (unlikely(r))
+			return r;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b17734e..dc73f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -646,6 +646,11 @@  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		dev_err(bo->adev->dev, "%p pin failed\n", bo);
 		goto error;
 	}
+	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
+	if (unlikely(r)) {
+		dev_err(bo->adev->dev, "%p bind failed\n", bo);
+		goto error;
+	}
 
 	bo->pin_count = 1;
 	if (gpu_addr != NULL)
@@ -918,6 +923,8 @@  void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 {
 	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
+	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
+		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
 	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
 		     !bo->pin_count);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4337b3a..c294aa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -256,8 +256,12 @@  static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	new_start = (u64)new_mem->start << PAGE_SHIFT;
 
 	switch (old_mem->mem_type) {
-	case TTM_PL_VRAM:
 	case TTM_PL_TT:
+		r = amdgpu_ttm_bind(bo->ttm, old_mem);
+		if (r)
+			return r;
+
+	case TTM_PL_VRAM:
 		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
 		break;
 	default:
@@ -265,8 +269,12 @@  static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 		return -EINVAL;
 	}
 	switch (new_mem->mem_type) {
-	case TTM_PL_VRAM:
 	case TTM_PL_TT:
+		r = amdgpu_ttm_bind(bo->ttm, new_mem);
+		if (r)
+			return r;
+
+	case TTM_PL_VRAM:
 		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
 		break;
 	default:
@@ -639,7 +647,6 @@  static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
-	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	int r;
 
 	if (gtt->userptr) {
@@ -660,6 +667,26 @@  static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 	    bo_mem->mem_type == AMDGPU_PL_OA)
 		return -EINVAL;
 
+	return 0;
+}
+
+bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+	return gtt && !list_empty(&gtt->list);
+}
+
+int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	uint32_t flags;
+	int r;
+
+	if (!ttm || amdgpu_ttm_is_bound(ttm))
+		return 0;
+
+	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 72f6bfc..214bae9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,4 +77,7 @@  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			struct fence **fence);
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
+bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
+int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5888e8a..fab7bb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -891,6 +891,10 @@  int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		return -EINVAL;
 	}
 
+	r = amdgpu_cs_sysvm_access_required(parser);
+	if (r)
+		return r;
+
 	ctx.parser = parser;
 	ctx.buf_sizes = buf_sizes;
 	ctx.ib_idx = ib_idx;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 9b71d6c..b0c6702 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -634,7 +634,11 @@  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
-	int i, r = 0, idx = 0;
+	int i, r, idx = 0;
+
+	r = amdgpu_cs_sysvm_access_required(p);
+	if (r)
+		return r;
 
 	while (idx < ib->length_dw) {
 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7660f82..a549abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1162,7 +1162,8 @@  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
-	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
+	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
+		adev == bo_va->bo->adev) ? flags : 0;
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->vm_status))

Comments

On 2016年09月06日 17:41, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We don't really need the GTT table any more most of the time.
Why? I thought GTT bo is always needed to be bound when GPU is trying to 
access it, doesn't it?

Regards,
David Zhou
>   So bind it
> only on demand.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>   8 files changed, 84 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..28d4a67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>   struct amdgpu_bo_va_mapping *
>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>   		       uint64_t addr, struct amdgpu_bo **bo);
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>   
>   #if defined(CONFIG_DRM_AMD_DAL)
>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..e069978 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   		}
>   	}
>   
> -	if (p->uf_entry.robj)
> -		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
> +	if (!r && p->uf_entry.robj) {
> +		struct amdgpu_bo *uf = p->uf_entry.robj;
> +
> +		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
> +		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> +	}
>   
>   error_validate:
>   	if (r) {
> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>   
>   	return NULL;
>   }
> +
> +/**
> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
> + *
> + * @parser: command submission parser context
> + *
> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
> + */
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
> +{
> +	unsigned i;
> +	int r;
> +
> +	if (!parser->bo_list)
> +		return 0;
> +
> +	for (i = 0; i < parser->bo_list->num_entries; i++) {
> +		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
> +
> +		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +		if (unlikely(r))
> +			return r;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b17734e..dc73f11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>   		dev_err(bo->adev->dev, "%p pin failed\n", bo);
>   		goto error;
>   	}
> +	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +	if (unlikely(r)) {
> +		dev_err(bo->adev->dev, "%p bind failed\n", bo);
> +		goto error;
> +	}
>   
>   	bo->pin_count = 1;
>   	if (gpu_addr != NULL)
> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>   {
>   	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
> +	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
> +		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
>   	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>   		     !bo->pin_count);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4337b3a..c294aa9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   	new_start = (u64)new_mem->start << PAGE_SHIFT;
>   
>   	switch (old_mem->mem_type) {
> -	case TTM_PL_VRAM:
>   	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, old_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>   		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>   		break;
>   	default:
> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   		return -EINVAL;
>   	}
>   	switch (new_mem->mem_type) {
> -	case TTM_PL_VRAM:
>   	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, new_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>   		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>   		break;
>   	default:
> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>   				   struct ttm_mem_reg *bo_mem)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void*)ttm;
> -	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>   	int r;
>   
>   	if (gtt->userptr) {
> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>   	    bo_mem->mem_type == AMDGPU_PL_OA)
>   		return -EINVAL;
>   
> +	return 0;
> +}
> +
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +
> +	return gtt && !list_empty(&gtt->list);
> +}
> +
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	uint32_t flags;
> +	int r;
> +
> +	if (!ttm || amdgpu_ttm_is_bound(ttm))
> +		return 0;
> +
> +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>   	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>   		ttm->pages, gtt->ttm.dma_address, flags);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72f6bfc..214bae9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			struct fence **fence);
>   
>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 5888e8a..fab7bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>   		return -EINVAL;
>   	}
>   
> +	r = amdgpu_cs_sysvm_access_required(parser);
> +	if (r)
> +		return r;
> +
>   	ctx.parser = parser;
>   	ctx.buf_sizes = buf_sizes;
>   	ctx.ib_idx = ib_idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 9b71d6c..b0c6702 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>   	uint32_t allocated = 0;
>   	uint32_t tmp, handle = 0;
>   	uint32_t *size = &tmp;
> -	int i, r = 0, idx = 0;
> +	int i, r, idx = 0;
> +
> +	r = amdgpu_cs_sysvm_access_required(p);
> +	if (r)
> +		return r;
>   
>   	while (idx < ib->length_dw) {
>   		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7660f82..a549abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	}
>   
>   	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
> -	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
> +	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
> +		adev == bo_va->bo->adev) ? flags : 0;
>   
>   	spin_lock(&vm->status_lock);
>   	if (!list_empty(&bo_va->vm_status))
Am 06.09.2016 um 11:53 schrieb zhoucm1:
>
>
> On 2016年09月06日 17:41, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We don't really need the GTT table any more most of the time.
> Why? I thought GTT bo is always needed to be bound when GPU is trying 
> to access it, doesn't it?

We only need it to be bound when we try to access it from the system VM 
(UVD/VCE, ring buffers, fences etc...).

The VMs for each process can work without it and it is just overhead to 
bind it all the time.

Regards,
Christian.


>
> Regards,
> David Zhou
>>   So bind it
>> only on demand.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>> ++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>> ++++++++++++++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 9d39fa8..28d4a67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>> amdgpu_device *adev) { }
>>   struct amdgpu_bo_va_mapping *
>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>                  uint64_t addr, struct amdgpu_bo **bo);
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>     #if defined(CONFIG_DRM_AMD_DAL)
>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 20a1962..e069978 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>> amdgpu_cs_parser *p,
>>           }
>>       }
>>   -    if (p->uf_entry.robj)
>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>> +    if (!r && p->uf_entry.robj) {
>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>> +
>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>> +    }
>>     error_validate:
>>       if (r) {
>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser 
>> *parser,
>>         return NULL;
>>   }
>> +
>> +/**
>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>> system VM
>> + *
>> + * @parser: command submission parser context
>> + *
>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by 
>> the system VM.
>> + */
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>> +{
>> +    unsigned i;
>> +    int r;
>> +
>> +    if (!parser->bo_list)
>> +        return 0;
>> +
>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>> +
>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +        if (unlikely(r))
>> +            return r;
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b17734e..dc73f11 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>> *bo, u32 domain,
>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>           goto error;
>>       }
>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +    if (unlikely(r)) {
>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>> +        goto error;
>> +    }
>>         bo->pin_count = 1;
>>       if (gpu_addr != NULL)
>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct 
>> fence *fence,
>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>   {
>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>                !bo->pin_count);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 4337b3a..c294aa9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>> ttm_buffer_object *bo,
>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>         switch (old_mem->mem_type) {
>> -    case TTM_PL_VRAM:
>>       case TTM_PL_TT:
>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>> +        if (r)
>> +            return r;
>> +
>> +    case TTM_PL_VRAM:
>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>           break;
>>       default:
>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>> ttm_buffer_object *bo,
>>           return -EINVAL;
>>       }
>>       switch (new_mem->mem_type) {
>> -    case TTM_PL_VRAM:
>>       case TTM_PL_TT:
>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>> +        if (r)
>> +            return r;
>> +
>> +    case TTM_PL_VRAM:
>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>           break;
>>       default:
>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>> *ttm,
>>                      struct ttm_mem_reg *bo_mem)
>>   {
>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>       int r;
>>         if (gtt->userptr) {
>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>> *ttm,
>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>           return -EINVAL;
>>   +    return 0;
>> +}
>> +
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>> +{
>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +
>> +    return gtt && !list_empty(&gtt->list);
>> +}
>> +
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>> +{
>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +    uint32_t flags;
>> +    int r;
>> +
>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>> +        return 0;
>> +
>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>           ttm->pages, gtt->ttm.dma_address, flags);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72f6bfc..214bae9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>               struct fence **fence);
>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index 5888e8a..fab7bb1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>           return -EINVAL;
>>       }
>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>> +    if (r)
>> +        return r;
>> +
>>       ctx.parser = parser;
>>       ctx.buf_sizes = buf_sizes;
>>       ctx.ib_idx = ib_idx;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> index 9b71d6c..b0c6702 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>       uint32_t allocated = 0;
>>       uint32_t tmp, handle = 0;
>>       uint32_t *size = &tmp;
>> -    int i, r = 0, idx = 0;
>> +    int i, r, idx = 0;
>> +
>> +    r = amdgpu_cs_sysvm_access_required(p);
>> +    if (r)
>> +        return r;
>>         while (idx < ib->length_dw) {
>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7660f82..a549abd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>> *adev,
>>       }
>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>> +        adev == bo_va->bo->adev) ? flags : 0;
>>         spin_lock(&vm->status_lock);
>>       if (!list_empty(&bo_va->vm_status))
>
On 2016年09月06日 18:48, Christian König wrote:
> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>
>>
>> On 2016年09月06日 17:41, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> We don't really need the GTT table any more most of the time.
>> Why? I thought GTT bo is always needed to be bound when GPU is trying 
>> to access it, doesn't it?
>
> We only need it to be bound when we try to access it from the system 
> VM (UVD/VCE, ring buffers, fences etc...).
>
> The VMs for each process can work without it and it is just overhead 
> to bind it all the time.
Yes, I got your mean, copying pte from GART to VM confused me, I thought 
binding GTT table is a must, that's wrong.
Go though vm code again, pte can be created by amdgpu_vm_map_gart if no 
GTT pte source when we don't bind it at all under VM process, am I right 
now?

Regards,
David Zhou

>
> Regards,
> Christian.
>
>
>>
>> Regards,
>> David Zhou
>>>   So bind it
>>> only on demand.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>>> ++++++++++++++++++++++++++++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>>> ++++++++++++++++++++++++++---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 9d39fa8..28d4a67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>>> amdgpu_device *adev) { }
>>>   struct amdgpu_bo_va_mapping *
>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 20a1962..e069978 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>>> amdgpu_cs_parser *p,
>>>           }
>>>       }
>>>   -    if (p->uf_entry.robj)
>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>> +    if (!r && p->uf_entry.robj) {
>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>> +
>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>> +    }
>>>     error_validate:
>>>       if (r) {
>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct 
>>> amdgpu_cs_parser *parser,
>>>         return NULL;
>>>   }
>>> +
>>> +/**
>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>>> system VM
>>> + *
>>> + * @parser: command submission parser context
>>> + *
>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by 
>>> the system VM.
>>> + */
>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>> +{
>>> +    unsigned i;
>>> +    int r;
>>> +
>>> +    if (!parser->bo_list)
>>> +        return 0;
>>> +
>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>> +
>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>> +        if (unlikely(r))
>>> +            return r;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index b17734e..dc73f11 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>>> *bo, u32 domain,
>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>           goto error;
>>>       }
>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>> +    if (unlikely(r)) {
>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>> +        goto error;
>>> +    }
>>>         bo->pin_count = 1;
>>>       if (gpu_addr != NULL)
>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, 
>>> struct fence *fence,
>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>   {
>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>                !bo->pin_count);
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 4337b3a..c294aa9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>>> ttm_buffer_object *bo,
>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>         switch (old_mem->mem_type) {
>>> -    case TTM_PL_VRAM:
>>>       case TTM_PL_TT:
>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +    case TTM_PL_VRAM:
>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>           break;
>>>       default:
>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>>> ttm_buffer_object *bo,
>>>           return -EINVAL;
>>>       }
>>>       switch (new_mem->mem_type) {
>>> -    case TTM_PL_VRAM:
>>>       case TTM_PL_TT:
>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +    case TTM_PL_VRAM:
>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>           break;
>>>       default:
>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>>> *ttm,
>>>                      struct ttm_mem_reg *bo_mem)
>>>   {
>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>       int r;
>>>         if (gtt->userptr) {
>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct 
>>> ttm_tt *ttm,
>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>           return -EINVAL;
>>>   +    return 0;
>>> +}
>>> +
>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>> +{
>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>> +
>>> +    return gtt && !list_empty(&gtt->list);
>>> +}
>>> +
>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>> +{
>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>> +    uint32_t flags;
>>> +    int r;
>>> +
>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>> +        return 0;
>>> +
>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 72f6bfc..214bae9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               struct fence **fence);
>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>> +
>>>   #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index 5888e8a..fab7bb1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>           return -EINVAL;
>>>       }
>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>> +    if (r)
>>> +        return r;
>>> +
>>>       ctx.parser = parser;
>>>       ctx.buf_sizes = buf_sizes;
>>>       ctx.ib_idx = ib_idx;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> index 9b71d6c..b0c6702 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>       uint32_t allocated = 0;
>>>       uint32_t tmp, handle = 0;
>>>       uint32_t *size = &tmp;
>>> -    int i, r = 0, idx = 0;
>>> +    int i, r, idx = 0;
>>> +
>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>> +    if (r)
>>> +        return r;
>>>         while (idx < ib->length_dw) {
>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 7660f82..a549abd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev,
>>>       }
>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>         spin_lock(&vm->status_lock);
>>>       if (!list_empty(&bo_va->vm_status))
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Am 07.09.2016 um 09:18 schrieb zhoucm1:
>
>
> On 2016年09月06日 18:48, Christian König wrote:
>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>
>>>
>>> On 2016年09月06日 17:41, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> We don't really need the GTT table any more most of the time.
>>> Why? I thought GTT bo is always needed to be bound when GPU is 
>>> trying to access it, doesn't it?
>>
>> We only need it to be bound when we try to access it from the system 
>> VM (UVD/VCE, ring buffers, fences etc...).
>>
>> The VMs for each process can work without it and it is just overhead 
>> to bind it all the time.
> Yes, I got your mean, copying pte from GART to VM confused me, I 
> thought binding GTT table is a must, that's wrong.
> Go though vm code again, pte can be created by amdgpu_vm_map_gart if 
> no GTT pte source when we don't bind it at all under VM process, am I 
> right now?

Yes exactly. I still need to measure the performance impact of that, 
adding the copy from the GTT made quite a difference when I introduced that.

I hope that doing the write when the update the VM is still faster than 
doing the GTT write.

Regards,
Christian.

>
> Regards,
> David Zhou
>
>>
>> Regards,
>> Christian.
>>
>>
>>>
>>> Regards,
>>> David Zhou
>>>>   So bind it
>>>> only on demand.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>>>> ++++++++++++++++++++++++++++--
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>>>> ++++++++++++++++++++++++++---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 9d39fa8..28d4a67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>>>> amdgpu_device *adev) { }
>>>>   struct amdgpu_bo_va_mapping *
>>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 20a1962..e069978 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>>>> amdgpu_cs_parser *p,
>>>>           }
>>>>       }
>>>>   -    if (p->uf_entry.robj)
>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>> +    if (!r && p->uf_entry.robj) {
>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>> +
>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>> +    }
>>>>     error_validate:
>>>>       if (r) {
>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct 
>>>> amdgpu_cs_parser *parser,
>>>>         return NULL;
>>>>   }
>>>> +
>>>> +/**
>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>>>> system VM
>>>> + *
>>>> + * @parser: command submission parser context
>>>> + *
>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible 
>>>> by the system VM.
>>>> + */
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>> +{
>>>> +    unsigned i;
>>>> +    int r;
>>>> +
>>>> +    if (!parser->bo_list)
>>>> +        return 0;
>>>> +
>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>> +
>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> +        if (unlikely(r))
>>>> +            return r;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index b17734e..dc73f11 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>>>> *bo, u32 domain,
>>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>           goto error;
>>>>       }
>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> +    if (unlikely(r)) {
>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>> +        goto error;
>>>> +    }
>>>>         bo->pin_count = 1;
>>>>       if (gpu_addr != NULL)
>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, 
>>>> struct fence *fence,
>>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>   {
>>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>                !bo->pin_count);
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 4337b3a..c294aa9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>>>> ttm_buffer_object *bo,
>>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>         switch (old_mem->mem_type) {
>>>> -    case TTM_PL_VRAM:
>>>>       case TTM_PL_TT:
>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +    case TTM_PL_VRAM:
>>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>           break;
>>>>       default:
>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>>>> ttm_buffer_object *bo,
>>>>           return -EINVAL;
>>>>       }
>>>>       switch (new_mem->mem_type) {
>>>> -    case TTM_PL_VRAM:
>>>>       case TTM_PL_TT:
>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +    case TTM_PL_VRAM:
>>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>           break;
>>>>       default:
>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct 
>>>> ttm_tt *ttm,
>>>>                      struct ttm_mem_reg *bo_mem)
>>>>   {
>>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>       int r;
>>>>         if (gtt->userptr) {
>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct 
>>>> ttm_tt *ttm,
>>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>>           return -EINVAL;
>>>>   +    return 0;
>>>> +}
>>>> +
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>> +{
>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> +
>>>> +    return gtt && !list_empty(&gtt->list);
>>>> +}
>>>> +
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>> +{
>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> +    uint32_t flags;
>>>> +    int r;
>>>> +
>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>> +        return 0;
>>>> +
>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index 72f6bfc..214bae9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>               struct fence **fence);
>>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>> +
>>>>   #endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> index 5888e8a..fab7bb1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>           return -EINVAL;
>>>>       }
>>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>>> +    if (r)
>>>> +        return r;
>>>> +
>>>>       ctx.parser = parser;
>>>>       ctx.buf_sizes = buf_sizes;
>>>>       ctx.ib_idx = ib_idx;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> index 9b71d6c..b0c6702 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>       uint32_t allocated = 0;
>>>>       uint32_t tmp, handle = 0;
>>>>       uint32_t *size = &tmp;
>>>> -    int i, r = 0, idx = 0;
>>>> +    int i, r, idx = 0;
>>>> +
>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>> +    if (r)
>>>> +        return r;
>>>>         while (idx < ib->length_dw) {
>>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 7660f82..a549abd 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev,
>>>>       }
>>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, 
>>>> mem);
>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>         spin_lock(&vm->status_lock);
>>>>       if (!list_empty(&bo_va->vm_status))
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> -----Original Message-----

> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf

> Of Christian König

> Sent: Tuesday, September 06, 2016 5:41 AM

> To: amd-gfx@lists.freedesktop.org

> Subject: [PATCH 2/2] drm/amdgpu: bind GTT on demand

> 

> From: Christian König <christian.koenig@amd.com>

> 

> We don't really need the GTT table any more most of the time. So bind it

> only on demand.

> 

> Signed-off-by: Christian König <christian.koenig@amd.com>


For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>


> ---

>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +

>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34

> ++++++++++++++++++++++++++++--

>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33

> ++++++++++++++++++++++++++---

>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-

>  8 files changed, 84 insertions(+), 7 deletions(-)

> 

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> index 9d39fa8..28d4a67 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct

> amdgpu_device *adev) { }

>  struct amdgpu_bo_va_mapping *

>  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,

>  		       uint64_t addr, struct amdgpu_bo **bo);

> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);

> 

>  #if defined(CONFIG_DRM_AMD_DAL)

>  int amdgpu_dm_display_resume(struct amdgpu_device *adev );

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> index 20a1962..e069978 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct

> amdgpu_cs_parser *p,

>  		}

>  	}

> 

> -	if (p->uf_entry.robj)

> -		p->job->uf_addr += amdgpu_bo_gpu_offset(p-

> >uf_entry.robj);

> +	if (!r && p->uf_entry.robj) {

> +		struct amdgpu_bo *uf = p->uf_entry.robj;

> +

> +		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);

> +		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);

> +	}

> 

>  error_validate:

>  	if (r) {

> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct

> amdgpu_cs_parser *parser,

> 

>  	return NULL;

>  }

> +

> +/**

> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the

> system VM

> + *

> + * @parser: command submission parser context

> + *

> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the

> system VM.

> + */

> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)

> +{

> +	unsigned i;

> +	int r;

> +

> +	if (!parser->bo_list)

> +		return 0;

> +

> +	for (i = 0; i < parser->bo_list->num_entries; i++) {

> +		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;

> +

> +		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);

> +		if (unlikely(r))

> +			return r;

> +	}

> +

> +	return 0;

> +}

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

> index b17734e..dc73f11 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo

> *bo, u32 domain,

>  		dev_err(bo->adev->dev, "%p pin failed\n", bo);

>  		goto error;

>  	}

> +	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);

> +	if (unlikely(r)) {

> +		dev_err(bo->adev->dev, "%p bind failed\n", bo);

> +		goto error;

> +	}

> 

>  	bo->pin_count = 1;

>  	if (gpu_addr != NULL)

> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,

> struct fence *fence,

>  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)

>  {

>  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);

> +	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&

> +		     !amdgpu_ttm_is_bound(bo->tbo.ttm));

>  	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&

>  		     !bo->pin_count);

> 

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

> index 4337b3a..c294aa9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct

> ttm_buffer_object *bo,

>  	new_start = (u64)new_mem->start << PAGE_SHIFT;

> 

>  	switch (old_mem->mem_type) {

> -	case TTM_PL_VRAM:

>  	case TTM_PL_TT:

> +		r = amdgpu_ttm_bind(bo->ttm, old_mem);

> +		if (r)

> +			return r;

> +

> +	case TTM_PL_VRAM:

>  		old_start += bo->bdev->man[old_mem-

> >mem_type].gpu_offset;

>  		break;

>  	default:

> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct

> ttm_buffer_object *bo,

>  		return -EINVAL;

>  	}

>  	switch (new_mem->mem_type) {

> -	case TTM_PL_VRAM:

>  	case TTM_PL_TT:

> +		r = amdgpu_ttm_bind(bo->ttm, new_mem);

> +		if (r)

> +			return r;

> +

> +	case TTM_PL_VRAM:

>  		new_start += bo->bdev->man[new_mem-

> >mem_type].gpu_offset;

>  		break;

>  	default:

> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt

> *ttm,

>  				   struct ttm_mem_reg *bo_mem)

>  {

>  	struct amdgpu_ttm_tt *gtt = (void*)ttm;

> -	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,

> bo_mem);

>  	int r;

> 

>  	if (gtt->userptr) {

> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt

> *ttm,

>  	    bo_mem->mem_type == AMDGPU_PL_OA)

>  		return -EINVAL;

> 

> +	return 0;

> +}

> +

> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)

> +{

> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;

> +

> +	return gtt && !list_empty(&gtt->list);

> +}

> +

> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)

> +{

> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;

> +	uint32_t flags;

> +	int r;

> +

> +	if (!ttm || amdgpu_ttm_is_bound(ttm))

> +		return 0;

> +

> +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);

>  	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,

>  		ttm->pages, gtt->ttm.dma_address, flags);

> 

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

> index 72f6bfc..214bae9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,

>  			struct fence **fence);

> 

>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);

> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);

> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);

> +

>  #endif

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

> index 5888e8a..fab7bb1 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct

> amdgpu_cs_parser *parser, uint32_t ib_idx)

>  		return -EINVAL;

>  	}

> 

> +	r = amdgpu_cs_sysvm_access_required(parser);

> +	if (r)

> +		return r;

> +

>  	ctx.parser = parser;

>  	ctx.buf_sizes = buf_sizes;

>  	ctx.ib_idx = ib_idx;

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

> index 9b71d6c..b0c6702 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct

> amdgpu_cs_parser *p, uint32_t ib_idx)

>  	uint32_t allocated = 0;

>  	uint32_t tmp, handle = 0;

>  	uint32_t *size = &tmp;

> -	int i, r = 0, idx = 0;

> +	int i, r, idx = 0;

> +

> +	r = amdgpu_cs_sysvm_access_required(p);

> +	if (r)

> +		return r;

> 

>  	while (idx < ib->length_dw) {

>  		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index 7660f82..a549abd 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device

> *adev,

>  	}

> 

>  	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);

> -	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;

> +	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&

> +		adev == bo_va->bo->adev) ? flags : 0;

> 

>  	spin_lock(&vm->status_lock);

>  	if (!list_empty(&bo_va->vm_status))

> --

> 2.5.0

> 

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Yeah, you save yourself the GTT update, which always uses the CPU.
Instead you write the page table entries to IBs using the CPU. Will be
interesting to see if that's a net speed-up or slow-down, if the
difference is significant at all.

The patch looks good to me. Thanks for tackling this. I didn't review it
too thoroughly, and if you forgot to map to GART somewhere important, I
would probably have missed it.

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>

Once we pull this change into the KFD branch, we can probably revert our
ridiculous GART size without losing the ability to map lots of system
memory to GPUVM. :)

Regards,
  Felix

On 16-09-07 04:18 AM, Christian König wrote:
> Am 07.09.2016 um 09:18 schrieb zhoucm1:
>>
>>
>> On 2016年09月06日 18:48, Christian König wrote:
>>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>>
>>>>
>>>> On 2016年09月06日 17:41, Christian König wrote:
>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> We don't really need the GTT table any more most of the time.
>>>> Why? I thought GTT bo is always needed to be bound when GPU is
>>>> trying to access it, doesn't it?
>>>
>>> We only need it to be bound when we try to access it from the system
>>> VM (UVD/VCE, ring buffers, fences etc...).
>>>
>>> The VMs for each process can work without it and it is just overhead
>>> to bind it all the time.
>> Yes, I got your mean, copying pte from GART to VM confused me, I
>> thought binding GTT table is a must, that's wrong.
>> Go though vm code again, pte can be created by amdgpu_vm_map_gart if
>> no GTT pte source when we don't bind it at all under VM process, am I
>> right now?
>
> Yes exactly. I still need to measure the performance impact of that,
> adding the copy from the GTT made quite a difference when I introduced
> that.
>
> I hope that doing the write when the update the VM is still faster
> than doing the GTT write.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> David Zhou
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>
>>>>
>>>> Regards,
>>>> David Zhou
>>>>>   So bind it
>>>>> only on demand.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34
>>>>> ++++++++++++++++++++++++++++--
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33
>>>>> ++++++++++++++++++++++++++---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index 9d39fa8..28d4a67 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
>>>>> amdgpu_device *adev) { }
>>>>>   struct amdgpu_bo_va_mapping *
>>>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser
>>>>> *parser);
>>>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> index 20a1962..e069978 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
>>>>> amdgpu_cs_parser *p,
>>>>>           }
>>>>>       }
>>>>>   -    if (p->uf_entry.robj)
>>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>>> +    if (!r && p->uf_entry.robj) {
>>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>>> +
>>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>>> +    }
>>>>>     error_validate:
>>>>>       if (r) {
>>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
>>>>> amdgpu_cs_parser *parser,
>>>>>         return NULL;
>>>>>   }
>>>>> +
>>>>> +/**
>>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
>>>>> system VM
>>>>> + *
>>>>> + * @parser: command submission parser context
>>>>> + *
>>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible
>>>>> by the system VM.
>>>>> + */
>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>>> +{
>>>>> +    unsigned i;
>>>>> +    int r;
>>>>> +
>>>>> +    if (!parser->bo_list)
>>>>> +        return 0;
>>>>> +
>>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>>> +
>>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>> +        if (unlikely(r))
>>>>> +            return r;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> index b17734e..dc73f11 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>>>> *bo, u32 domain,
>>>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>>           goto error;
>>>>>       }
>>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>> +    if (unlikely(r)) {
>>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>>> +        goto error;
>>>>> +    }
>>>>>         bo->pin_count = 1;
>>>>>       if (gpu_addr != NULL)
>>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
>>>>> struct fence *fence,
>>>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>>   {
>>>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>>                !bo->pin_count);
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> index 4337b3a..c294aa9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
>>>>> ttm_buffer_object *bo,
>>>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>>         switch (old_mem->mem_type) {
>>>>> -    case TTM_PL_VRAM:
>>>>>       case TTM_PL_TT:
>>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +    case TTM_PL_VRAM:
>>>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>>           break;
>>>>>       default:
>>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
>>>>> ttm_buffer_object *bo,
>>>>>           return -EINVAL;
>>>>>       }
>>>>>       switch (new_mem->mem_type) {
>>>>> -    case TTM_PL_VRAM:
>>>>>       case TTM_PL_TT:
>>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +    case TTM_PL_VRAM:
>>>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>>           break;
>>>>>       default:
>>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct
>>>>> ttm_tt *ttm,
>>>>>                      struct ttm_mem_reg *bo_mem)
>>>>>   {
>>>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,
>>>>> bo_mem);
>>>>>       int r;
>>>>>         if (gtt->userptr) {
>>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct
>>>>> ttm_tt *ttm,
>>>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>>>           return -EINVAL;
>>>>>   +    return 0;
>>>>> +}
>>>>> +
>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>>> +{
>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>> +
>>>>> +    return gtt && !list_empty(&gtt->list);
>>>>> +}
>>>>> +
>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>>> +{
>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>> +    uint32_t flags;
>>>>> +    int r;
>>>>> +
>>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>>> +        return 0;
>>>>> +
>>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> index 72f6bfc..214bae9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>               struct fence **fence);
>>>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>>> +
>>>>>   #endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> index 5888e8a..fab7bb1 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
>>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>>           return -EINVAL;
>>>>>       }
>>>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>>>> +    if (r)
>>>>> +        return r;
>>>>> +
>>>>>       ctx.parser = parser;
>>>>>       ctx.buf_sizes = buf_sizes;
>>>>>       ctx.ib_idx = ib_idx;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> index 9b71d6c..b0c6702 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
>>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>>       uint32_t allocated = 0;
>>>>>       uint32_t tmp, handle = 0;
>>>>>       uint32_t *size = &tmp;
>>>>> -    int i, r = 0, idx = 0;
>>>>> +    int i, r, idx = 0;
>>>>> +
>>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>>> +    if (r)
>>>>> +        return r;
>>>>>         while (idx < ib->length_dw) {
>>>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 7660f82..a549abd 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev,
>>>>>       }
>>>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm,
>>>>> mem);
>>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>>         spin_lock(&vm->status_lock);
>>>>>       if (!list_empty(&bo_va->vm_status))
>>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> The patch looks good to me. Thanks for tackling this. I didn't review it
> too thoroughly, and if you forgot to map to GART somewhere important, I
> would probably have missed it.

Yeah that's also my biggest concern, but that should at least raise a 
WARN_ON().

> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Thanks.

> Once we pull this change into the KFD branch, we can probably revert our
> ridiculous GART size without losing the ability to map lots of system
> memory to GPUVM. :)
Wait a second with that. You also need my second part of that work where 
I also stop assigning address space to all those buffers which aren't 
mapped.

But I'm still working on that.

Regards,
Christian.

Am 07.09.2016 um 18:07 schrieb Felix Kuehling:
> Yeah, you save yourself the GTT update, which always uses the CPU.
> Instead you write the page table entries to IBs using the CPU. Will be
> interesting to see if that's a net speed-up or slow-down, if the
> difference is significant at all.
>
> The patch looks good to me. Thanks for tackling this. I didn't review it
> too thoroughly, and if you forgot to map to GART somewhere important, I
> would probably have missed it.
>
> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Once we pull this change into the KFD branch, we can probably revert our
> ridiculous GART size without losing the ability to map lots of system
> memory to GPUVM. :)
>
> Regards,
>    Felix
>
> On 16-09-07 04:18 AM, Christian König wrote:
>> Am 07.09.2016 um 09:18 schrieb zhoucm1:
>>>
>>> On 2016年09月06日 18:48, Christian König wrote:
>>>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>>>
>>>>> On 2016年09月06日 17:41, Christian König wrote:
>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> We don't really need the GTT table any more most of the time.
>>>>> Why? I thought GTT bo is always needed to be bound when GPU is
>>>>> trying to access it, doesn't it?
>>>> We only need it to be bound when we try to access it from the system
>>>> VM (UVD/VCE, ring buffers, fences etc...).
>>>>
>>>> The VMs for each process can work without it and it is just overhead
>>>> to bind it all the time.
>>> Yes, I got your mean, copying pte from GART to VM confused me, I
>>> thought binding GTT table is a must, that's wrong.
>>> Go though vm code again, pte can be created by amdgpu_vm_map_gart if
>>> no GTT pte source when we don't bind it at all under VM process, am I
>>> right now?
>> Yes exactly. I still need to measure the performance impact of that,
>> adding the copy from the GTT made quite a difference when I introduced
>> that.
>>
>> I hope that doing the write when the update the VM is still faster
>> than doing the GTT write.
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>> David Zhou
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>
>>>>> Regards,
>>>>> David Zhou
>>>>>>    So bind it
>>>>>> only on demand.
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34
>>>>>> ++++++++++++++++++++++++++++--
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33
>>>>>> ++++++++++++++++++++++++++---
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>>>    8 files changed, 84 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> index 9d39fa8..28d4a67 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
>>>>>> amdgpu_device *adev) { }
>>>>>>    struct amdgpu_bo_va_mapping *
>>>>>>    amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>>>                   uint64_t addr, struct amdgpu_bo **bo);
>>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser
>>>>>> *parser);
>>>>>>      #if defined(CONFIG_DRM_AMD_DAL)
>>>>>>    int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> index 20a1962..e069978 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
>>>>>> amdgpu_cs_parser *p,
>>>>>>            }
>>>>>>        }
>>>>>>    -    if (p->uf_entry.robj)
>>>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>>>> +    if (!r && p->uf_entry.robj) {
>>>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>>>> +
>>>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>>>> +    }
>>>>>>      error_validate:
>>>>>>        if (r) {
>>>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
>>>>>> amdgpu_cs_parser *parser,
>>>>>>          return NULL;
>>>>>>    }
>>>>>> +
>>>>>> +/**
>>>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
>>>>>> system VM
>>>>>> + *
>>>>>> + * @parser: command submission parser context
>>>>>> + *
>>>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible
>>>>>> by the system VM.
>>>>>> + */
>>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>>>> +{
>>>>>> +    unsigned i;
>>>>>> +    int r;
>>>>>> +
>>>>>> +    if (!parser->bo_list)
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>>>> +
>>>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>>> +        if (unlikely(r))
>>>>>> +            return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> index b17734e..dc73f11 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>>>>> *bo, u32 domain,
>>>>>>            dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>>>            goto error;
>>>>>>        }
>>>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>>> +    if (unlikely(r)) {
>>>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>>>> +        goto error;
>>>>>> +    }
>>>>>>          bo->pin_count = 1;
>>>>>>        if (gpu_addr != NULL)
>>>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
>>>>>> struct fence *fence,
>>>>>>    u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>>>    {
>>>>>>        WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>>>                 !bo->pin_count);
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> index 4337b3a..c294aa9 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
>>>>>> ttm_buffer_object *bo,
>>>>>>        new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>>>          switch (old_mem->mem_type) {
>>>>>> -    case TTM_PL_VRAM:
>>>>>>        case TTM_PL_TT:
>>>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +    case TTM_PL_VRAM:
>>>>>>            old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>>>            break;
>>>>>>        default:
>>>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
>>>>>> ttm_buffer_object *bo,
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>>        switch (new_mem->mem_type) {
>>>>>> -    case TTM_PL_VRAM:
>>>>>>        case TTM_PL_TT:
>>>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +    case TTM_PL_VRAM:
>>>>>>            new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>>>            break;
>>>>>>        default:
>>>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct
>>>>>> ttm_tt *ttm,
>>>>>>                       struct ttm_mem_reg *bo_mem)
>>>>>>    {
>>>>>>        struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,
>>>>>> bo_mem);
>>>>>>        int r;
>>>>>>          if (gtt->userptr) {
>>>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct
>>>>>> ttm_tt *ttm,
>>>>>>            bo_mem->mem_type == AMDGPU_PL_OA)
>>>>>>            return -EINVAL;
>>>>>>    +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>>>> +{
>>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>>> +
>>>>>> +    return gtt && !list_empty(&gtt->list);
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>>>> +{
>>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>>> +    uint32_t flags;
>>>>>> +    int r;
>>>>>> +
>>>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>>>        r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>>>            ttm->pages, gtt->ttm.dma_address, flags);
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> index 72f6bfc..214bae9 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>>                struct fence **fence);
>>>>>>      int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>>>> +
>>>>>>    #endif
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> index 5888e8a..fab7bb1 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
>>>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>>    +    r = amdgpu_cs_sysvm_access_required(parser);
>>>>>> +    if (r)
>>>>>> +        return r;
>>>>>> +
>>>>>>        ctx.parser = parser;
>>>>>>        ctx.buf_sizes = buf_sizes;
>>>>>>        ctx.ib_idx = ib_idx;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> index 9b71d6c..b0c6702 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
>>>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>>>        uint32_t allocated = 0;
>>>>>>        uint32_t tmp, handle = 0;
>>>>>>        uint32_t *size = &tmp;
>>>>>> -    int i, r = 0, idx = 0;
>>>>>> +    int i, r, idx = 0;
>>>>>> +
>>>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>>>> +    if (r)
>>>>>> +        return r;
>>>>>>          while (idx < ib->length_dw) {
>>>>>>            uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 7660f82..a549abd 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev,
>>>>>>        }
>>>>>>          flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm,
>>>>>> mem);
>>>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>>>          spin_lock(&vm->status_lock);
>>>>>>        if (!list_empty(&bo_va->vm_status))
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
This breaks Tonga such that it hangs. Reproducible quickly with:

R600_DEBUG=testdma glxgears

It's a randomized test that runs forever. It should hang within 2 seconds.

Marek

On Tue, Sep 6, 2016 at 11:41 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We don't really need the GTT table any more most of the time. So bind it
> only on demand.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>  8 files changed, 84 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..28d4a67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>  struct amdgpu_bo_va_mapping *
>  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>                        uint64_t addr, struct amdgpu_bo **bo);
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>
>  #if defined(CONFIG_DRM_AMD_DAL)
>  int amdgpu_dm_display_resume(struct amdgpu_device *adev );
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..e069978 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                 }
>         }
>
> -       if (p->uf_entry.robj)
> -               p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
> +       if (!r && p->uf_entry.robj) {
> +               struct amdgpu_bo *uf = p->uf_entry.robj;
> +
> +               r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
> +               p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> +       }
>
>  error_validate:
>         if (r) {
> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>
>         return NULL;
>  }
> +
> +/**
> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
> + *
> + * @parser: command submission parser context
> + *
> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
> + */
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
> +{
> +       unsigned i;
> +       int r;
> +
> +       if (!parser->bo_list)
> +               return 0;
> +
> +       for (i = 0; i < parser->bo_list->num_entries; i++) {
> +               struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
> +
> +               r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +               if (unlikely(r))
> +                       return r;
> +       }
> +
> +       return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b17734e..dc73f11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>                 dev_err(bo->adev->dev, "%p pin failed\n", bo);
>                 goto error;
>         }
> +       r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +       if (unlikely(r)) {
> +               dev_err(bo->adev->dev, "%p bind failed\n", bo);
> +               goto error;
> +       }
>
>         bo->pin_count = 1;
>         if (gpu_addr != NULL)
> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>  {
>         WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
> +       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
> +                    !amdgpu_ttm_is_bound(bo->tbo.ttm));
>         WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>                      !bo->pin_count);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4337b3a..c294aa9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>         new_start = (u64)new_mem->start << PAGE_SHIFT;
>
>         switch (old_mem->mem_type) {
> -       case TTM_PL_VRAM:
>         case TTM_PL_TT:
> +               r = amdgpu_ttm_bind(bo->ttm, old_mem);
> +               if (r)
> +                       return r;
> +
> +       case TTM_PL_VRAM:
>                 old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>                 break;
>         default:
> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>                 return -EINVAL;
>         }
>         switch (new_mem->mem_type) {
> -       case TTM_PL_VRAM:
>         case TTM_PL_TT:
> +               r = amdgpu_ttm_bind(bo->ttm, new_mem);
> +               if (r)
> +                       return r;
> +
> +       case TTM_PL_VRAM:
>                 new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>                 break;
>         default:
> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>                                    struct ttm_mem_reg *bo_mem)
>  {
>         struct amdgpu_ttm_tt *gtt = (void*)ttm;
> -       uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>         int r;
>
>         if (gtt->userptr) {
> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>             bo_mem->mem_type == AMDGPU_PL_OA)
>                 return -EINVAL;
>
> +       return 0;
> +}
> +
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
> +{
> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +
> +       return gtt && !list_empty(&gtt->list);
> +}
> +
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
> +{
> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +       uint32_t flags;
> +       int r;
> +
> +       if (!ttm || amdgpu_ttm_is_bound(ttm))
> +               return 0;
> +
> +       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>         r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>                 ttm->pages, gtt->ttm.dma_address, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72f6bfc..214bae9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         struct fence **fence);
>
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 5888e8a..fab7bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>                 return -EINVAL;
>         }
>
> +       r = amdgpu_cs_sysvm_access_required(parser);
> +       if (r)
> +               return r;
> +
>         ctx.parser = parser;
>         ctx.buf_sizes = buf_sizes;
>         ctx.ib_idx = ib_idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 9b71d6c..b0c6702 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>         uint32_t allocated = 0;
>         uint32_t tmp, handle = 0;
>         uint32_t *size = &tmp;
> -       int i, r = 0, idx = 0;
> +       int i, r, idx = 0;
> +
> +       r = amdgpu_cs_sysvm_access_required(p);
> +       if (r)
> +               return r;
>
>         while (idx < ib->length_dw) {
>                 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7660f82..a549abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>         }
>
>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
> -       gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
> +       gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
> +               adev == bo_va->bo->adev) ? flags : 0;
>
>         spin_lock(&vm->status_lock);
>         if (!list_empty(&bo_va->vm_status))
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Thanks for the note, going to take a look tomorrow.

Probably just another case I've missed.

Christian.

Am 17.09.2016 um 23:14 schrieb Marek Olšák:
> This breaks Tonga such that it hangs. Reproducible quickly with:
>
> R600_DEBUG=testdma glxgears
>
> It's a randomized test that runs forever. It should hang within 2 seconds.
>
> Marek
>
> On Tue, Sep 6, 2016 at 11:41 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We don't really need the GTT table any more most of the time. So bind it
>> only on demand.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 9d39fa8..28d4a67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>>   struct amdgpu_bo_va_mapping *
>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>                         uint64_t addr, struct amdgpu_bo **bo);
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>
>>   #if defined(CONFIG_DRM_AMD_DAL)
>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 20a1962..e069978 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>                  }
>>          }
>>
>> -       if (p->uf_entry.robj)
>> -               p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>> +       if (!r && p->uf_entry.robj) {
>> +               struct amdgpu_bo *uf = p->uf_entry.robj;
>> +
>> +               r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>> +               p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>> +       }
>>
>>   error_validate:
>>          if (r) {
>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>
>>          return NULL;
>>   }
>> +
>> +/**
>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
>> + *
>> + * @parser: command submission parser context
>> + *
>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
>> + */
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>> +{
>> +       unsigned i;
>> +       int r;
>> +
>> +       if (!parser->bo_list)
>> +               return 0;
>> +
>> +       for (i = 0; i < parser->bo_list->num_entries; i++) {
>> +               struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>> +
>> +               r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +               if (unlikely(r))
>> +                       return r;
>> +       }
>> +
>> +       return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b17734e..dc73f11 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>                  dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>                  goto error;
>>          }
>> +       r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +       if (unlikely(r)) {
>> +               dev_err(bo->adev->dev, "%p bind failed\n", bo);
>> +               goto error;
>> +       }
>>
>>          bo->pin_count = 1;
>>          if (gpu_addr != NULL)
>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>   {
>>          WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>> +       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>> +                    !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>          WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>                       !bo->pin_count);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 4337b3a..c294aa9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>          new_start = (u64)new_mem->start << PAGE_SHIFT;
>>
>>          switch (old_mem->mem_type) {
>> -       case TTM_PL_VRAM:
>>          case TTM_PL_TT:
>> +               r = amdgpu_ttm_bind(bo->ttm, old_mem);
>> +               if (r)
>> +                       return r;
>> +
>> +       case TTM_PL_VRAM:
>>                  old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>                  break;
>>          default:
>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>                  return -EINVAL;
>>          }
>>          switch (new_mem->mem_type) {
>> -       case TTM_PL_VRAM:
>>          case TTM_PL_TT:
>> +               r = amdgpu_ttm_bind(bo->ttm, new_mem);
>> +               if (r)
>> +                       return r;
>> +
>> +       case TTM_PL_VRAM:
>>                  new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>                  break;
>>          default:
>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>>                                     struct ttm_mem_reg *bo_mem)
>>   {
>>          struct amdgpu_ttm_tt *gtt = (void*)ttm;
>> -       uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>          int r;
>>
>>          if (gtt->userptr) {
>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>>              bo_mem->mem_type == AMDGPU_PL_OA)
>>                  return -EINVAL;
>>
>> +       return 0;
>> +}
>> +
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>> +{
>> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +
>> +       return gtt && !list_empty(&gtt->list);
>> +}
>> +
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>> +{
>> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +       uint32_t flags;
>> +       int r;
>> +
>> +       if (!ttm || amdgpu_ttm_is_bound(ttm))
>> +               return 0;
>> +
>> +       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>          r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>                  ttm->pages, gtt->ttm.dma_address, flags);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72f6bfc..214bae9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>                          struct fence **fence);
>>
>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index 5888e8a..fab7bb1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>>                  return -EINVAL;
>>          }
>>
>> +       r = amdgpu_cs_sysvm_access_required(parser);
>> +       if (r)
>> +               return r;
>> +
>>          ctx.parser = parser;
>>          ctx.buf_sizes = buf_sizes;
>>          ctx.ib_idx = ib_idx;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> index 9b71d6c..b0c6702 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>>          uint32_t allocated = 0;
>>          uint32_t tmp, handle = 0;
>>          uint32_t *size = &tmp;
>> -       int i, r = 0, idx = 0;
>> +       int i, r, idx = 0;
>> +
>> +       r = amdgpu_cs_sysvm_access_required(p);
>> +       if (r)
>> +               return r;
>>
>>          while (idx < ib->length_dw) {
>>                  uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7660f82..a549abd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>          }
>>
>>          flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>> -       gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>> +       gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>> +               adev == bo_va->bo->adev) ? flags : 0;
>>
>>          spin_lock(&vm->status_lock);
>>          if (!list_empty(&bo_va->vm_status))
>> --
>> 2.5.0
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Marek Olšák wrote:
> This breaks Tonga such that it hangs. Reproducible quickly with:
>
> R600_DEBUG=testdma glxgears
>
> It's a randomized test that runs forever. It should hang within 2 seconds.

So what is the status of this now?

R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
until I, by luck, saw this.

On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.

On every update since it's still hung though now it takes longer.

Todays update tested both on head and reset on to the drm-next tag
still hang.

Going back over older 4.9-wips it's one built on 26th August that is
the first not to hang.
Am 22.09.2016 um 23:54 schrieb Andy Furniss:
> Marek Olšák wrote:
>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>
>> R600_DEBUG=testdma glxgears
>>
>> It's a randomized test that runs forever. It should hang within 2 
>> seconds.
>
> So what is the status of this now?

The status is that I'm hammering my head on the desk for two weeks now 
what the heck is going wrong here.

What I've found so far is that a VM update command doesn't have the 
result it should have, but I absolutely don't understand what happens here.

The good news is that it's 100% reproducible and I already found and 
fixed two bugs.

If you have any idea or more testing results what is going wrong here 
please let me know.

If we can't fix this before the 4.9 release we are just going to revert 
the patch causing this.

Regards,
Christian.

>
> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
> until I, by luck, saw this.
>
> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>
> On every update since it's still hung though now it takes longer.
>
> Todays update tested both on head and reset on to the drm-next tag
> still hang.
>
> Going back over older 4.9-wips it's one built on 26th August that is
> the first not to hang.
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Christian König wrote:
> Am 22.09.2016 um 23:54 schrieb Andy Furniss:
>> Marek Olšák wrote:
>>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>>
>>> R600_DEBUG=testdma glxgears
>>>
>>> It's a randomized test that runs forever. It should hang within 2
>>> seconds.
>>
>> So what is the status of this now?
>
> The status is that I'm hammering my head on the desk for two weeks now
> what the heck is going wrong here.

Oh, OK, I was just checking no one thought it was fixed really.

>
> What I've found so far is that a VM update command doesn't have the
> result it should have, but I absolutely don't understand what happens here.
>
> The good news is that it's 100% reproducible and I already found and
> fixed two bugs.
>
> If you have any idea or more testing results what is going wrong here
> please let me know.

I guess, you already did it anyway or it doesn't help, but

R600_DEBUG=testdma,check_vm glxgears

Does catch a vmfault and output to ddebug_dumps.
It initially saves me from locking, though I will lock on repeated runs.

>
> If we can't fix this before the 4.9 release we are just going to revert
> the patch causing this.
>
> Regards,
> Christian.
>
>>
>> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
>> until I, by luck, saw this.
>>
>> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>>
>> On every update since it's still hung though now it takes longer.
>>
>> Todays update tested both on head and reset on to the drm-next tag
>> still hang.
>>
>> Going back over older 4.9-wips it's one built on 26th August that is
>> the first not to hang.
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
On Friday, September 23, 2016 4:55:33 PM EDT Andy Furniss wrote:
> Christian König wrote:
> > Am 22.09.2016 um 23:54 schrieb Andy Furniss:
> >> Marek Olšák wrote:
> >>> This breaks Tonga such that it hangs. Reproducible quickly with:
> >>> 
> >>> R600_DEBUG=testdma glxgears
> >>> 
> >>> It's a randomized test that runs forever. It should hang within 2
> >>> seconds.
> >> 
> >> So what is the status of this now?
> > 
> > The status is that I'm hammering my head on the desk for two weeks now
> > what the heck is going wrong here.
> 
> Oh, OK, I was just checking no one thought it was fixed really.
> 
> > What I've found so far is that a VM update command doesn't have the
> > result it should have, but I absolutely don't understand what happens
> > here.
> > 
> > The good news is that it's 100% reproducible and I already found and
> > fixed two bugs.
> > 
> > If you have any idea or more testing results what is going wrong here
> > please let me know.
> 
> I guess, you already did it anyway or it doesn't help, but
> 
> R600_DEBUG=testdma,check_vm glxgears
> 
> Does catch a vmfault and output to ddebug_dumps.
> It initially saves me from locking, though I will lock on repeated runs.
> 
> > If we can't fix this before the 4.9 release we are just going to revert
> > the patch causing this.
> > 
> > Regards,
> > Christian.
> > 

This is directly related to my thread on [amdgpu][CIK] drm-next-4.9-wip - 
piglit test - max-texture-size - GPU deadlock

Using Linus master kernel w/o drm-next-4.9-wip:
I cannot wedge GPU, the amdgpu kernel driver correctly throws 
errors if I've run out of memory vs crashing GPU.

as CIK doesn't have SDMA enabled in Mesa, I cannot reproduce Andy's test case, 
using max-texture-size can however.

Thanks,
Shawn

> >> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
> >> until I, by luck, saw this.
> >> 
> >> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
> >> 
> >> On every update since it's still hung though now it takes longer.
> >> 
> >> Todays update tested both on head and reset on to the drm-next tag
> >> still hang.
> >> 
> >> Going back over older 4.9-wips it's one built on 26th August that is
> >> the first not to hang.
> >> 
> >> 
> >> 
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
With current drm-next-4.9-wip plus  drm/amdgpu: fix addr handling in 
amdgpu_vm_bo_update_mapping

testdma is now working OK for my R9285 - just did 35k tests while 
simultaneously running valley/heaven, plus messing with 
powerplay/cpufreq and all is
good, no lock, no vm faults.


Andy Furniss wrote:
> Christian König wrote:
>> Am 22.09.2016 um 23:54 schrieb Andy Furniss:
>>> Marek Olšák wrote:
>>>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>>>
>>>> R600_DEBUG=testdma glxgears
>>>>
>>>> It's a randomized test that runs forever. It should hang within 2
>>>> seconds.
>>>
>>> So what is the status of this now?
>>
>> The status is that I'm hammering my head on the desk for two weeks now
>> what the heck is going wrong here.
>
> Oh, OK, I was just checking no one thought it was fixed really.
>
>>
>> What I've found so far is that a VM update command doesn't have the
>> result it should have, but I absolutely don't understand what happens
>> here.
>>
>> The good news is that it's 100% reproducible and I already found and
>> fixed two bugs.
>>
>> If you have any idea or more testing results what is going wrong here
>> please let me know.
>
> I guess, you already did it anyway or it doesn't help, but
>
> R600_DEBUG=testdma,check_vm glxgears
>
> Does catch a vmfault and output to ddebug_dumps.
> It initially saves me from locking, though I will lock on repeated runs.
>
>>
>> If we can't fix this before the 4.9 release we are just going to revert
>> the patch causing this.
>>
>> Regards,
>> Christian.
>>
>>>
>>> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
>>> until I, by luck, saw this.
>>>
>>> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>>>
>>> On every update since it's still hung though now it takes longer.
>>>
>>> Todays update tested both on head and reset on to the drm-next tag
>>> still hang.
>>>
>>> Going back over older 4.9-wips it's one built on 26th August that is
>>> the first not to hang.
>>>
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>>
>