[3/3] drm/amdgpu: remove amdgpu_cs_try_evict

Submitted by Christian König on Sept. 2, 2019, 10:52 a.m.

Details

Message ID 20190902105219.2813-3-christian.koenig@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Christian König Sept. 2, 2019, 10:52 a.m.
Trying to evict things from the current working set doesn't work that
well anymore because of per VM BOs.

Rely on reserving VRAM for page tables to avoid contention.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-------------------------
 2 files changed, 1 insertion(+), 71 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a236213f8e8e..d1995156733e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -478,7 +478,6 @@  struct amdgpu_cs_parser {
 	uint64_t			bytes_moved_vis_threshold;
 	uint64_t			bytes_moved;
 	uint64_t			bytes_moved_vis;
-	struct amdgpu_bo_list_entry	*evictable;
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fd95b586b590..03182d968d3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -447,75 +447,12 @@  static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	return r;
 }
 
-/* Last resort, try to evict something from the current working set */
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
-				struct amdgpu_bo *validated)
-{
-	uint32_t domain = validated->allowed_domains;
-	struct ttm_operation_ctx ctx = { true, false };
-	int r;
-
-	if (!p->evictable)
-		return false;
-
-	for (;&p->evictable->tv.head != &p->validated;
-	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
-
-		struct amdgpu_bo_list_entry *candidate = p->evictable;
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
-		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		bool update_bytes_moved_vis;
-		uint32_t other;
-
-		/* If we reached our current BO we can forget it */
-		if (bo == validated)
-			break;
-
-		/* We can't move pinned BOs here */
-		if (bo->pin_count)
-			continue;
-
-		other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
-		/* Check if this BO is in one of the domains we need space for */
-		if (!(other & domain))
-			continue;
-
-		/* Check if we can move this BO somewhere else */
-		other = bo->allowed_domains & ~domain;
-		if (!other)
-			continue;
-
-		/* Good we can try to move this BO somewhere else */
-		update_bytes_moved_vis =
-				!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-				amdgpu_bo_in_cpu_visible_vram(bo);
-		amdgpu_bo_placement_from_domain(bo, other);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		p->bytes_moved += ctx.bytes_moved;
-		if (update_bytes_moved_vis)
-			p->bytes_moved_vis += ctx.bytes_moved;
-
-		if (unlikely(r))
-			break;
-
-		p->evictable = list_prev_entry(p->evictable, tv.head);
-		list_move(&candidate->tv.head, &p->validated);
-
-		return true;
-	}
-
-	return false;
-}
-
 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 {
 	struct amdgpu_cs_parser *p = param;
 	int r;
 
-	do {
-		r = amdgpu_cs_bo_validate(p, bo);
-	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+	r = amdgpu_cs_bo_validate(p, bo);
 	if (r)
 		return r;
 
@@ -554,9 +491,6 @@  static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			binding_userptr = true;
 		}
 
-		if (p->evictable == lobj)
-			p->evictable = NULL;
-
 		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;
@@ -659,9 +593,6 @@  static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
 	p->bytes_moved_vis = 0;
-	p->evictable = list_last_entry(&p->validated,
-				       struct amdgpu_bo_list_entry,
-				       tv.head);
 
 	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 				      amdgpu_cs_validate, p);

Comments

Hey Christian,

Can you go into details a bit more on the how and why this doesn't
work well anymore?  (such as its relationship with per VM BOs?)  I am
curious to learn more because I was reading into this chunk of code
earlier.  Is this something that the Shrinker API can help with?

Regards,
Kenny

On Mon, Sep 2, 2019 at 6:52 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Trying to evict things from the current working set doesn't work that
> well anymore because of per VM BOs.
>
> Rely on reserving VRAM for page tables to avoid contention.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-------------------------
>  2 files changed, 1 insertion(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a236213f8e8e..d1995156733e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
>         uint64_t                        bytes_moved_vis_threshold;
>         uint64_t                        bytes_moved;
>         uint64_t                        bytes_moved_vis;
> -       struct amdgpu_bo_list_entry     *evictable;
>
>         /* user fence */
>         struct amdgpu_bo_list_entry     uf_entry;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fd95b586b590..03182d968d3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
>         return r;
>  }
>
> -/* Last resort, try to evict something from the current working set */
> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
> -                               struct amdgpu_bo *validated)
> -{
> -       uint32_t domain = validated->allowed_domains;
> -       struct ttm_operation_ctx ctx = { true, false };
> -       int r;
> -
> -       if (!p->evictable)
> -               return false;
> -
> -       for (;&p->evictable->tv.head != &p->validated;
> -            p->evictable = list_prev_entry(p->evictable, tv.head)) {
> -
> -               struct amdgpu_bo_list_entry *candidate = p->evictable;
> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
> -               struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -               bool update_bytes_moved_vis;
> -               uint32_t other;
> -
> -               /* If we reached our current BO we can forget it */
> -               if (bo == validated)
> -                       break;
> -
> -               /* We can't move pinned BOs here */
> -               if (bo->pin_count)
> -                       continue;
> -
> -               other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> -
> -               /* Check if this BO is in one of the domains we need space for */
> -               if (!(other & domain))
> -                       continue;
> -
> -               /* Check if we can move this BO somewhere else */
> -               other = bo->allowed_domains & ~domain;
> -               if (!other)
> -                       continue;
> -
> -               /* Good we can try to move this BO somewhere else */
> -               update_bytes_moved_vis =
> -                               !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
> -                               amdgpu_bo_in_cpu_visible_vram(bo);
> -               amdgpu_bo_placement_from_domain(bo, other);
> -               r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> -               p->bytes_moved += ctx.bytes_moved;
> -               if (update_bytes_moved_vis)
> -                       p->bytes_moved_vis += ctx.bytes_moved;
> -
> -               if (unlikely(r))
> -                       break;
> -
> -               p->evictable = list_prev_entry(p->evictable, tv.head);
> -               list_move(&candidate->tv.head, &p->validated);
> -
> -               return true;
> -       }
> -
> -       return false;
> -}
> -
>  static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
>  {
>         struct amdgpu_cs_parser *p = param;
>         int r;
>
> -       do {
> -               r = amdgpu_cs_bo_validate(p, bo);
> -       } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
> +       r = amdgpu_cs_bo_validate(p, bo);
>         if (r)
>                 return r;
>
> @@ -554,9 +491,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>                         binding_userptr = true;
>                 }
>
> -               if (p->evictable == lobj)
> -                       p->evictable = NULL;
> -
>                 r = amdgpu_cs_validate(p, bo);
>                 if (r)
>                         return r;
> @@ -659,9 +593,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                                           &p->bytes_moved_vis_threshold);
>         p->bytes_moved = 0;
>         p->bytes_moved_vis = 0;
> -       p->evictable = list_last_entry(&p->validated,
> -                                      struct amdgpu_bo_list_entry,
> -                                      tv.head);
>
>         r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
>                                       amdgpu_cs_validate, p);
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Hi Kenny,

When we do a CS we have a certain set of buffers which the submission is 
working with and are locked down while we prepare the submission.

This working set contains of the buffers in the BO list as well as the 
one in the VM plus one or two for CSA and user fences etc..

Now what can happen is that we find that we need to allocate some page 
tables during the CS and when a lot of BOs are locked down allocating a 
page table can fail because we can't evict other BOs.

What this code tries todo is to evict stuff from the BO list to make 
room for VM BOs, but since now much more BOs are bound to the VM this 
doesn't work any more.


The root of the problem is that it is really tricky to figure out how 
much memory you need for the page tables in the first place. See for a 
BO in VRAM we usually need only one PTE for each 2MB, but for a BO in 
system memory we need one PTE for each 4K of memory.

So what can happen is that you evict something from VRAM because you 
need room and that eviction in turn makes you need even more room.....

It can take a while until this reaches a stable point, so this patch set 
here switched from a dynamic approach to just assuming the worst and 
reserving some memory for page tables.

Regards,
Christian.

Am 02.09.19 um 16:07 schrieb Kenny Ho:
> Hey Christian,
>
> Can you go into details a bit more on the how and why this doesn't
> work well anymore?  (such as its relationship with per VM BOs?)  I am
> curious to learn more because I was reading into this chunk of code
> earlier.  Is this something that the Shrinker API can help with?
>
> Regards,
> Kenny
>
> On Mon, Sep 2, 2019 at 6:52 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Trying to evict things from the current working set doesn't work that
>> well anymore because of per VM BOs.
>>
>> Rely on reserving VRAM for page tables to avoid contention.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-------------------------
>>   2 files changed, 1 insertion(+), 71 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index a236213f8e8e..d1995156733e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
>>          uint64_t                        bytes_moved_vis_threshold;
>>          uint64_t                        bytes_moved;
>>          uint64_t                        bytes_moved_vis;
>> -       struct amdgpu_bo_list_entry     *evictable;
>>
>>          /* user fence */
>>          struct amdgpu_bo_list_entry     uf_entry;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index fd95b586b590..03182d968d3d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
>>          return r;
>>   }
>>
>> -/* Last resort, try to evict something from the current working set */
>> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
>> -                               struct amdgpu_bo *validated)
>> -{
>> -       uint32_t domain = validated->allowed_domains;
>> -       struct ttm_operation_ctx ctx = { true, false };
>> -       int r;
>> -
>> -       if (!p->evictable)
>> -               return false;
>> -
>> -       for (;&p->evictable->tv.head != &p->validated;
>> -            p->evictable = list_prev_entry(p->evictable, tv.head)) {
>> -
>> -               struct amdgpu_bo_list_entry *candidate = p->evictable;
>> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
>> -               struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> -               bool update_bytes_moved_vis;
>> -               uint32_t other;
>> -
>> -               /* If we reached our current BO we can forget it */
>> -               if (bo == validated)
>> -                       break;
>> -
>> -               /* We can't move pinned BOs here */
>> -               if (bo->pin_count)
>> -                       continue;
>> -
>> -               other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
>> -
>> -               /* Check if this BO is in one of the domains we need space for */
>> -               if (!(other & domain))
>> -                       continue;
>> -
>> -               /* Check if we can move this BO somewhere else */
>> -               other = bo->allowed_domains & ~domain;
>> -               if (!other)
>> -                       continue;
>> -
>> -               /* Good we can try to move this BO somewhere else */
>> -               update_bytes_moved_vis =
>> -                               !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
>> -                               amdgpu_bo_in_cpu_visible_vram(bo);
>> -               amdgpu_bo_placement_from_domain(bo, other);
>> -               r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>> -               p->bytes_moved += ctx.bytes_moved;
>> -               if (update_bytes_moved_vis)
>> -                       p->bytes_moved_vis += ctx.bytes_moved;
>> -
>> -               if (unlikely(r))
>> -                       break;
>> -
>> -               p->evictable = list_prev_entry(p->evictable, tv.head);
>> -               list_move(&candidate->tv.head, &p->validated);
>> -
>> -               return true;
>> -       }
>> -
>> -       return false;
>> -}
>> -
>>   static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
>>   {
>>          struct amdgpu_cs_parser *p = param;
>>          int r;
>>
>> -       do {
>> -               r = amdgpu_cs_bo_validate(p, bo);
>> -       } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
>> +       r = amdgpu_cs_bo_validate(p, bo);
>>          if (r)
>>                  return r;
>>
>> @@ -554,9 +491,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>>                          binding_userptr = true;
>>                  }
>>
>> -               if (p->evictable == lobj)
>> -                       p->evictable = NULL;
>> -
>>                  r = amdgpu_cs_validate(p, bo);
>>                  if (r)
>>                          return r;
>> @@ -659,9 +593,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>                                            &p->bytes_moved_vis_threshold);
>>          p->bytes_moved = 0;
>>          p->bytes_moved_vis = 0;
>> -       p->evictable = list_last_entry(&p->validated,
>> -                                      struct amdgpu_bo_list_entry,
>> -                                      tv.head);
>>
>>          r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
>>                                        amdgpu_cs_validate, p);
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Ah ok, thanks for the explanation.  About the last bit, what is the
reason behind the differences in page size?  (I assume that's what you
meant by PTE?  Or is that something else?)

Regards,
Kenny

On Mon, Sep 2, 2019 at 10:31 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Hi Kenny,
>
> When we do a CS we have a certain set of buffers which the submission is
> working with and are locked down while we prepare the submission.
>
> This working set contains of the buffers in the BO list as well as the
> one in the VM plus one or two for CSA and user fences etc..
>
> Now what can happen is that we find that we need to allocate some page
> tables during the CS and when a lot of BOs are locked down allocating a
> page table can fail because we can't evict other BOs.
>
> What this code tries todo is to evict stuff from the BO list to make
> room for VM BOs, but since now much more BOs are bound to the VM this
> doesn't work any more.
>
>
> The root of the problem is that it is really tricky to figure out how
> much memory you need for the page tables in the first place. See for a
> BO in VRAM we usually need only one PTE for each 2MB, but for a BO in
> system memory we need one PTE for each 4K of memory.
>
> So what can happen is that you evict something from VRAM because you
> need room and that eviction in turn makes you need even more room.....
>
> It can take a while until this reaches a stable point, so this patch set
> here switched from a dynamic approach to just assuming the worst and
> reserving some memory for page tables.
>
> Regards,
> Christian.
>
> Am 02.09.19 um 16:07 schrieb Kenny Ho:
> > Hey Christian,
> >
> > Can you go into details a bit more on the how and why this doesn't
> > work well anymore?  (such as its relationship with per VM BOs?)  I am
> > curious to learn more because I was reading into this chunk of code
> > earlier.  Is this something that the Shrinker API can help with?
> >
> > Regards,
> > Kenny
> >
> > On Mon, Sep 2, 2019 at 6:52 AM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Trying to evict things from the current working set doesn't work that
> >> well anymore because of per VM BOs.
> >>
> >> Rely on reserving VRAM for page tables to avoid contention.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 -
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-------------------------
> >>   2 files changed, 1 insertion(+), 71 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index a236213f8e8e..d1995156733e 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
> >>          uint64_t                        bytes_moved_vis_threshold;
> >>          uint64_t                        bytes_moved;
> >>          uint64_t                        bytes_moved_vis;
> >> -       struct amdgpu_bo_list_entry     *evictable;
> >>
> >>          /* user fence */
> >>          struct amdgpu_bo_list_entry     uf_entry;
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> index fd95b586b590..03182d968d3d 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
> >>          return r;
> >>   }
> >>
> >> -/* Last resort, try to evict something from the current working set */
> >> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
> >> -                               struct amdgpu_bo *validated)
> >> -{
> >> -       uint32_t domain = validated->allowed_domains;
> >> -       struct ttm_operation_ctx ctx = { true, false };
> >> -       int r;
> >> -
> >> -       if (!p->evictable)
> >> -               return false;
> >> -
> >> -       for (;&p->evictable->tv.head != &p->validated;
> >> -            p->evictable = list_prev_entry(p->evictable, tv.head)) {
> >> -
> >> -               struct amdgpu_bo_list_entry *candidate = p->evictable;
> >> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
> >> -               struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >> -               bool update_bytes_moved_vis;
> >> -               uint32_t other;
> >> -
> >> -               /* If we reached our current BO we can forget it */
> >> -               if (bo == validated)
> >> -                       break;
> >> -
> >> -               /* We can't move pinned BOs here */
> >> -               if (bo->pin_count)
> >> -                       continue;
> >> -
> >> -               other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> >> -
> >> -               /* Check if this BO is in one of the domains we need space for */
> >> -               if (!(other & domain))
> >> -                       continue;
> >> -
> >> -               /* Check if we can move this BO somewhere else */
> >> -               other = bo->allowed_domains & ~domain;
> >> -               if (!other)
> >> -                       continue;
> >> -
> >> -               /* Good we can try to move this BO somewhere else */
> >> -               update_bytes_moved_vis =
> >> -                               !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
> >> -                               amdgpu_bo_in_cpu_visible_vram(bo);
> >> -               amdgpu_bo_placement_from_domain(bo, other);
> >> -               r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> >> -               p->bytes_moved += ctx.bytes_moved;
> >> -               if (update_bytes_moved_vis)
> >> -                       p->bytes_moved_vis += ctx.bytes_moved;
> >> -
> >> -               if (unlikely(r))
> >> -                       break;
> >> -
> >> -               p->evictable = list_prev_entry(p->evictable, tv.head);
> >> -               list_move(&candidate->tv.head, &p->validated);
> >> -
> >> -               return true;
> >> -       }
> >> -
> >> -       return false;
> >> -}
> >> -
> >>   static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
> >>   {
> >>          struct amdgpu_cs_parser *p = param;
> >>          int r;
> >>
> >> -       do {
> >> -               r = amdgpu_cs_bo_validate(p, bo);
> >> -       } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
> >> +       r = amdgpu_cs_bo_validate(p, bo);
> >>          if (r)
> >>                  return r;
> >>
> >> @@ -554,9 +491,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
> >>                          binding_userptr = true;
> >>                  }
> >>
> >> -               if (p->evictable == lobj)
> >> -                       p->evictable = NULL;
> >> -
> >>                  r = amdgpu_cs_validate(p, bo);
> >>                  if (r)
> >>                          return r;
> >> @@ -659,9 +593,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> >>                                            &p->bytes_moved_vis_threshold);
> >>          p->bytes_moved = 0;
> >>          p->bytes_moved_vis = 0;
> >> -       p->evictable = list_last_entry(&p->validated,
> >> -                                      struct amdgpu_bo_list_entry,
> >> -                                      tv.head);
> >>
> >>          r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
> >>                                        amdgpu_cs_validate, p);
> >> --
> >> 2.17.1
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
PTE means Page Table Entry.

Starting with Vega10 we have multi level page tables and when you 
continuously allocate 2MB (or even 1GB) you can set a bit in the 
hierarchy that the walker should stop and take the address and flags for 
the whole 2MB block.

The result is that with 4K pages you need an additional level in the 
hierarchy compared to 2MB pages and so more memory.

Regards,
Christian.

Am 02.09.19 um 16:47 schrieb Kenny Ho:
> Ah ok, thanks for the explanation.  About the last bit, what is the
> reason behind the differences in page size?  (I assume that's what you
> meant by PTE?  Or is that something else?)
>
> Regards,
> Kenny
>
> On Mon, Sep 2, 2019 at 10:31 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Hi Kenny,
>>
>> When we do a CS we have a certain set of buffers which the submission is
>> working with and are locked down while we prepare the submission.
>>
>> This working set contains of the buffers in the BO list as well as the
>> one in the VM plus one or two for CSA and user fences etc..
>>
>> Now what can happen is that we find that we need to allocate some page
>> tables during the CS and when a lot of BOs are locked down allocating a
>> page table can fail because we can't evict other BOs.
>>
>> What this code tries todo is to evict stuff from the BO list to make
>> room for VM BOs, but since now much more BOs are bound to the VM this
>> doesn't work any more.
>>
>>
>> The root of the problem is that it is really tricky to figure out how
>> much memory you need for the page tables in the first place. See for a
>> BO in VRAM we usually need only one PTE for each 2MB, but for a BO in
>> system memory we need one PTE for each 4K of memory.
>>
>> So what can happen is that you evict something from VRAM because you
>> need room and that eviction in turn makes you need even more room.....
>>
>> It can take a while until this reaches a stable point, so this patch set
>> here switched from a dynamic approach to just assuming the worst and
>> reserving some memory for page tables.
>>
>> Regards,
>> Christian.
>>
>> Am 02.09.19 um 16:07 schrieb Kenny Ho:
>>> Hey Christian,
>>>
>>> Can you go into details a bit more on the how and why this doesn't
>>> work well anymore?  (such as its relationship with per VM BOs?)  I am
>>> curious to learn more because I was reading into this chunk of code
>>> earlier.  Is this something that the Shrinker API can help with?
>>>
>>> Regards,
>>> Kenny
>>>
>>> On Mon, Sep 2, 2019 at 6:52 AM Christian König
>>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>>> Trying to evict things from the current working set doesn't work that
>>>> well anymore because of per VM BOs.
>>>>
>>>> Rely on reserving VRAM for page tables to avoid contention.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 -
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-------------------------
>>>>    2 files changed, 1 insertion(+), 71 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index a236213f8e8e..d1995156733e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
>>>>           uint64_t                        bytes_moved_vis_threshold;
>>>>           uint64_t                        bytes_moved;
>>>>           uint64_t                        bytes_moved_vis;
>>>> -       struct amdgpu_bo_list_entry     *evictable;
>>>>
>>>>           /* user fence */
>>>>           struct amdgpu_bo_list_entry     uf_entry;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index fd95b586b590..03182d968d3d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
>>>>           return r;
>>>>    }
>>>>
>>>> -/* Last resort, try to evict something from the current working set */
>>>> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
>>>> -                               struct amdgpu_bo *validated)
>>>> -{
>>>> -       uint32_t domain = validated->allowed_domains;
>>>> -       struct ttm_operation_ctx ctx = { true, false };
>>>> -       int r;
>>>> -
>>>> -       if (!p->evictable)
>>>> -               return false;
>>>> -
>>>> -       for (;&p->evictable->tv.head != &p->validated;
>>>> -            p->evictable = list_prev_entry(p->evictable, tv.head)) {
>>>> -
>>>> -               struct amdgpu_bo_list_entry *candidate = p->evictable;
>>>> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
>>>> -               struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>> -               bool update_bytes_moved_vis;
>>>> -               uint32_t other;
>>>> -
>>>> -               /* If we reached our current BO we can forget it */
>>>> -               if (bo == validated)
>>>> -                       break;
>>>> -
>>>> -               /* We can't move pinned BOs here */
>>>> -               if (bo->pin_count)
>>>> -                       continue;
>>>> -
>>>> -               other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
>>>> -
>>>> -               /* Check if this BO is in one of the domains we need space for */
>>>> -               if (!(other & domain))
>>>> -                       continue;
>>>> -
>>>> -               /* Check if we can move this BO somewhere else */
>>>> -               other = bo->allowed_domains & ~domain;
>>>> -               if (!other)
>>>> -                       continue;
>>>> -
>>>> -               /* Good we can try to move this BO somewhere else */
>>>> -               update_bytes_moved_vis =
>>>> -                               !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
>>>> -                               amdgpu_bo_in_cpu_visible_vram(bo);
>>>> -               amdgpu_bo_placement_from_domain(bo, other);
>>>> -               r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>>> -               p->bytes_moved += ctx.bytes_moved;
>>>> -               if (update_bytes_moved_vis)
>>>> -                       p->bytes_moved_vis += ctx.bytes_moved;
>>>> -
>>>> -               if (unlikely(r))
>>>> -                       break;
>>>> -
>>>> -               p->evictable = list_prev_entry(p->evictable, tv.head);
>>>> -               list_move(&candidate->tv.head, &p->validated);
>>>> -
>>>> -               return true;
>>>> -       }
>>>> -
>>>> -       return false;
>>>> -}
>>>> -
>>>>    static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
>>>>    {
>>>>           struct amdgpu_cs_parser *p = param;
>>>>           int r;
>>>>
>>>> -       do {
>>>> -               r = amdgpu_cs_bo_validate(p, bo);
>>>> -       } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
>>>> +       r = amdgpu_cs_bo_validate(p, bo);
>>>>           if (r)
>>>>                   return r;
>>>>
>>>> @@ -554,9 +491,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>>>>                           binding_userptr = true;
>>>>                   }
>>>>
>>>> -               if (p->evictable == lobj)
>>>> -                       p->evictable = NULL;
>>>> -
>>>>                   r = amdgpu_cs_validate(p, bo);
>>>>                   if (r)
>>>>                           return r;
>>>> @@ -659,9 +593,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>>>                                             &p->bytes_moved_vis_threshold);
>>>>           p->bytes_moved = 0;
>>>>           p->bytes_moved_vis = 0;
>>>> -       p->evictable = list_last_entry(&p->validated,
>>>> -                                      struct amdgpu_bo_list_entry,
>>>> -                                      tv.head);
>>>>
>>>>           r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
>>>>                                         amdgpu_cs_validate, p);
>>>> --
>>>> 2.17.1
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx