drm/ttm: Fix the memory delay free issue

Submitted by Deng, Emily on July 10, 2019, 9:29 a.m.

Details

Message ID 1562750971-1628-1-git-send-email-Emily.Deng@amd.com
State New
Headers show
Series "drm/ttm: Fix the memory delay free issue" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Deng, Emily July 10, 2019, 9:29 a.m.
For vulkan cts allocation test cases, they will create a series of bos, and then free
them. As it has lots of alloction test cases with the same vm, as per vm
bo feature enable, all of those bos' resv are the same. But the bo free is quite slow,
as they use the same resv object, for every time, free a bo,
it will check the resv whether signal, if it signal, then will free it. But
as the test cases will continue to create bo, and the resv fence is increasing. So the
free is more slower than creating. It will cause memory exhausting.

Method:
When the resv signal, release all the bos which are use the same
resv object.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f9a3d4c..57ec59b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -543,6 +543,7 @@  static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 {
 	struct ttm_bo_global *glob = bo->bdev->glob;
 	struct reservation_object *resv;
+	struct ttm_buffer_object *resv_bo, *resv_bo_next;
 	int ret;
 
 	if (unlikely(list_empty(&bo->ddestroy)))
@@ -566,10 +567,14 @@  static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 							   interruptible,
 							   30 * HZ);
 
-		if (lret < 0)
+		if (lret < 0) {
+			kref_put(&bo->list_kref, ttm_bo_release_list);
 			return lret;
-		else if (lret == 0)
+		}
+		else if (lret == 0) {
+			kref_put(&bo->list_kref, ttm_bo_release_list);
 			return -EBUSY;
+		}
 
 		spin_lock(&glob->lru_lock);
 		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv)) {
@@ -582,6 +587,7 @@  static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			 * here.
 			 */
 			spin_unlock(&glob->lru_lock);
+			kref_put(&bo->list_kref, ttm_bo_release_list);
 			return 0;
 		}
 		ret = 0;
@@ -591,15 +597,29 @@  static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 		if (unlock_resv)
 			kcl_reservation_object_unlock(bo->resv);
 		spin_unlock(&glob->lru_lock);
+		kref_put(&bo->list_kref, ttm_bo_release_list);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
 	kref_put(&bo->list_kref, ttm_bo_ref_bug);
-
 	spin_unlock(&glob->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
+	kref_put(&bo->list_kref, ttm_bo_release_list);
+
+	spin_lock(&glob->lru_lock);
+	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev->ddestroy, ddestroy) {
+		if (resv_bo->resv == bo->resv) {
+			ttm_bo_del_from_lru(resv_bo);
+			list_del_init(&resv_bo->ddestroy);
+			spin_unlock(&glob->lru_lock);
+			ttm_bo_cleanup_memtype_use(resv_bo);
+			kref_put(&resv_bo->list_kref, ttm_bo_release_list);
+			spin_lock(&glob->lru_lock);
+		}
+	}
+	spin_unlock(&glob->lru_lock);
 
 	if (unlock_resv)
 		kcl_reservation_object_unlock(bo->resv);
@@ -639,9 +659,8 @@  static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 		} else {
 			spin_unlock(&glob->lru_lock);
+			kref_put(&bo->list_kref, ttm_bo_release_list);
 		}
-
-		kref_put(&bo->list_kref, ttm_bo_release_list);
 		spin_lock(&glob->lru_lock);
 	}
 	list_splice_tail(&removed, &bdev->ddestroy);

Comments

It doesn't make sense that freeing BO still uses per-vm resv.

I remember when BO is in release list, its resv will be from per-vm resv 
copy. Could you check it?

-David

在 2019/7/10 17:29, Emily Deng 写道:
> For vulkan cts allocation test cases, they will create a series of bos, and then free

> them. As it has lots of alloction test cases with the same vm, as per vm

> bo feature enable, all of those bos' resv are the same. But the bo free is quite slow,

> as they use the same resv object, for every time, free a bo,

> it will check the resv whether signal, if it signal, then will free it. But

> as the test cases will continue to create bo, and the resv fence is increasing. So the

> free is more slower than creating. It will cause memory exhausting.

>

> Method:

> When the resv signal, release all the bos which are use the same

> resv object.

>

> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

> ---

>   drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----

>   1 file changed, 24 insertions(+), 5 deletions(-)

>

> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

> index f9a3d4c..57ec59b 100644

> --- a/drivers/gpu/drm/ttm/ttm_bo.c

> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

>   {

>   	struct ttm_bo_global *glob = bo->bdev->glob;

>   	struct reservation_object *resv;

> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>   	int ret;

>   

>   	if (unlikely(list_empty(&bo->ddestroy)))

> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

>   							   interruptible,

>   							   30 * HZ);

>   

> -		if (lret < 0)

> +		if (lret < 0) {

> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>   			return lret;

> -		else if (lret == 0)

> +		}

> +		else if (lret == 0) {

> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>   			return -EBUSY;

> +		}

>   

>   		spin_lock(&glob->lru_lock);

>   		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv)) {

> @@ -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

>   			 * here.

>   			 */

>   			spin_unlock(&glob->lru_lock);

> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>   			return 0;

>   		}

>   		ret = 0;

> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

>   		if (unlock_resv)

>   			kcl_reservation_object_unlock(bo->resv);

>   		spin_unlock(&glob->lru_lock);

> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>   		return ret;

>   	}

>   

>   	ttm_bo_del_from_lru(bo);

>   	list_del_init(&bo->ddestroy);

>   	kref_put(&bo->list_kref, ttm_bo_ref_bug);

> -

>   	spin_unlock(&glob->lru_lock);

>   	ttm_bo_cleanup_memtype_use(bo);

> +	kref_put(&bo->list_kref, ttm_bo_release_list);

> +

> +	spin_lock(&glob->lru_lock);

> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev->ddestroy, ddestroy) {

> +		if (resv_bo->resv == bo->resv) {

> +			ttm_bo_del_from_lru(resv_bo);

> +			list_del_init(&resv_bo->ddestroy);

> +			spin_unlock(&glob->lru_lock);

> +			ttm_bo_cleanup_memtype_use(resv_bo);

> +			kref_put(&resv_bo->list_kref, ttm_bo_release_list);

> +			spin_lock(&glob->lru_lock);

> +		}

> +	}

> +	spin_unlock(&glob->lru_lock);

>   

>   	if (unlock_resv)

>   		kcl_reservation_object_unlock(bo->resv);

> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)

>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

>   		} else {

>   			spin_unlock(&glob->lru_lock);

> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>   		}

> -

> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>   		spin_lock(&glob->lru_lock);

>   	}

>   	list_splice_tail(&removed, &bdev->ddestroy);
Hi David,
     You are right, it will copy per-vm resv.
     But currently, it still has the delay free issue which non per vm bo doesn't has. Maybe it already has new fences append to this resv object before copy.

Hi Christian,
    Do you have any suggestion about this? For per vm bo, it seems always delay to free the ttm bo.

Best wishes
Emily Deng
>-----Original Message-----

>From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>Sent: Wednesday, July 10, 2019 9:28 PM

>To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>

>It doesn't make sense that freeing BO still uses per-vm resv.

>

>I remember when BO is in release list, its resv will be from per-vm resv copy.

>Could you check it?

>

>-David

>

>在 2019/7/10 17:29, Emily Deng 写道:

>> For vulkan cts allocation test cases, they will create a series of

>> bos, and then free them. As it has lots of alloction test cases with

>> the same vm, as per vm bo feature enable, all of those bos' resv are

>> the same. But the bo free is quite slow, as they use the same resv

>> object, for every time, free a bo, it will check the resv whether

>> signal, if it signal, then will free it. But as the test cases will

>> continue to create bo, and the resv fence is increasing. So the free is more

>slower than creating. It will cause memory exhausting.

>>

>> Method:

>> When the resv signal, release all the bos which are use the same resv

>> object.

>>

>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>> ---

>>   drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----

>>   1 file changed, 24 insertions(+), 5 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>ttm_buffer_object *bo,

>>   {

>>   	struct ttm_bo_global *glob = bo->bdev->glob;

>>   	struct reservation_object *resv;

>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>   	int ret;

>>

>>   	if (unlikely(list_empty(&bo->ddestroy)))

>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>ttm_buffer_object *bo,

>>   							   interruptible,

>>   							   30 * HZ);

>>

>> -		if (lret < 0)

>> +		if (lret < 0) {

>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>   			return lret;

>> -		else if (lret == 0)

>> +		}

>> +		else if (lret == 0) {

>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>   			return -EBUSY;

>> +		}

>>

>>   		spin_lock(&glob->lru_lock);

>>   		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv))

>{ @@

>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object

>*bo,

>>   			 * here.

>>   			 */

>>   			spin_unlock(&glob->lru_lock);

>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>   			return 0;

>>   		}

>>   		ret = 0;

>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>ttm_buffer_object *bo,

>>   		if (unlock_resv)

>>   			kcl_reservation_object_unlock(bo->resv);

>>   		spin_unlock(&glob->lru_lock);

>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>   		return ret;

>>   	}

>>

>>   	ttm_bo_del_from_lru(bo);

>>   	list_del_init(&bo->ddestroy);

>>   	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>> -

>>   	spin_unlock(&glob->lru_lock);

>>   	ttm_bo_cleanup_memtype_use(bo);

>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>> +

>> +	spin_lock(&glob->lru_lock);

>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>ddestroy, ddestroy) {

>> +		if (resv_bo->resv == bo->resv) {

>> +			ttm_bo_del_from_lru(resv_bo);

>> +			list_del_init(&resv_bo->ddestroy);

>> +			spin_unlock(&glob->lru_lock);

>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>> +			kref_put(&resv_bo->list_kref, ttm_bo_release_list);

>> +			spin_lock(&glob->lru_lock);

>> +		}

>> +	}

>> +	spin_unlock(&glob->lru_lock);

>>

>>   	if (unlock_resv)

>>   		kcl_reservation_object_unlock(bo->resv);

>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>ttm_bo_device *bdev, bool remove_all)

>>   			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

>>   		} else {

>>   			spin_unlock(&glob->lru_lock);

>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>   		}

>> -

>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>   		spin_lock(&glob->lru_lock);

>>   	}

>>   	list_splice_tail(&removed, &bdev->ddestroy);
Hi guys,

> Do you have any suggestion about this? For per vm bo, it seems always delay to free the ttm bo.

Yeah, and that is correct behavior.

Since we don't know who is using a per-vm BO we need to wait for all 
command submissions in flight when it is freed.

For this we copy the current state of the shared reservation object into 
the private one in ttm_bo_individualize_resv.

Regards,
Christian.

Am 15.07.19 um 08:49 schrieb Deng, Emily:
> Hi David,

>       You are right, it will copy per-vm resv.

>       But currently, it still has the delay free issue which non per vm bo doesn't has. Maybe it already has new fences append to this resv object before copy.

>

> Hi Christian,

>      Do you have any suggestion about this? For per vm bo, it seems always delay to free the ttm bo.

>

> Best wishes

> Emily Deng

>> -----Original Message-----

>> From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>> Sent: Wednesday, July 10, 2019 9:28 PM

>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>

>> It doesn't make sense that freeing BO still uses per-vm resv.

>>

>> I remember when BO is in release list, its resv will be from per-vm resv copy.

>> Could you check it?

>>

>> -David

>>

>> 在 2019/7/10 17:29, Emily Deng 写道:

>>> For vulkan cts allocation test cases, they will create a series of

>>> bos, and then free them. As it has lots of alloction test cases with

>>> the same vm, as per vm bo feature enable, all of those bos' resv are

>>> the same. But the bo free is quite slow, as they use the same resv

>>> object, for every time, free a bo, it will check the resv whether

>>> signal, if it signal, then will free it. But as the test cases will

>>> continue to create bo, and the resv fence is increasing. So the free is more

>> slower than creating. It will cause memory exhausting.

>>> Method:

>>> When the resv signal, release all the bos which are use the same resv

>>> object.

>>>

>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>> ---

>>>    drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----

>>>    1 file changed, 24 insertions(+), 5 deletions(-)

>>>

>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>> ttm_buffer_object *bo,

>>>    {

>>>    	struct ttm_bo_global *glob = bo->bdev->glob;

>>>    	struct reservation_object *resv;

>>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>>    	int ret;

>>>

>>>    	if (unlikely(list_empty(&bo->ddestroy)))

>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>> ttm_buffer_object *bo,

>>>    							   interruptible,

>>>    							   30 * HZ);

>>>

>>> -		if (lret < 0)

>>> +		if (lret < 0) {

>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    			return lret;

>>> -		else if (lret == 0)

>>> +		}

>>> +		else if (lret == 0) {

>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    			return -EBUSY;

>>> +		}

>>>

>>>    		spin_lock(&glob->lru_lock);

>>>    		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv))

>> { @@

>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object

>> *bo,

>>>    			 * here.

>>>    			 */

>>>    			spin_unlock(&glob->lru_lock);

>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    			return 0;

>>>    		}

>>>    		ret = 0;

>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>> ttm_buffer_object *bo,

>>>    		if (unlock_resv)

>>>    			kcl_reservation_object_unlock(bo->resv);

>>>    		spin_unlock(&glob->lru_lock);

>>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    		return ret;

>>>    	}

>>>

>>>    	ttm_bo_del_from_lru(bo);

>>>    	list_del_init(&bo->ddestroy);

>>>    	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>>> -

>>>    	spin_unlock(&glob->lru_lock);

>>>    	ttm_bo_cleanup_memtype_use(bo);

>>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>>> +

>>> +	spin_lock(&glob->lru_lock);

>>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>> ddestroy, ddestroy) {

>>> +		if (resv_bo->resv == bo->resv) {

>>> +			ttm_bo_del_from_lru(resv_bo);

>>> +			list_del_init(&resv_bo->ddestroy);

>>> +			spin_unlock(&glob->lru_lock);

>>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>>> +			kref_put(&resv_bo->list_kref, ttm_bo_release_list);

>>> +			spin_lock(&glob->lru_lock);

>>> +		}

>>> +	}

>>> +	spin_unlock(&glob->lru_lock);

>>>

>>>    	if (unlock_resv)

>>>    		kcl_reservation_object_unlock(bo->resv);

>>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>> ttm_bo_device *bdev, bool remove_all)

>>>    			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

>>>    		} else {

>>>    			spin_unlock(&glob->lru_lock);

>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    		}

>>> -

>>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>    		spin_lock(&glob->lru_lock);

>>>    	}

>>>    	list_splice_tail(&removed, &bdev->ddestroy);
Hi Christian,
    As has this behavior, when test vulkan cts allocation test, it will exhausting the memory, and cause out of memory. Do you think we don't need to fix it?

Best wishes
Emily Deng
>-----Original Message-----

>From: Koenig, Christian <Christian.Koenig@amd.com>

>Sent: Monday, July 15, 2019 5:31 PM

>To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

><David1.Zhou@amd.com>

>Cc: amd-gfx@lists.freedesktop.org

>Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>

>Hi guys,

>

>> Do you have any suggestion about this? For per vm bo, it seems always

>delay to free the ttm bo.

>Yeah, and that is correct behavior.

>

>Since we don't know who is using a per-vm BO we need to wait for all

>command submissions in flight when it is freed.

>

>For this we copy the current state of the shared reservation object into the

>private one in ttm_bo_individualize_resv.

>

>Regards,

>Christian.

>

>Am 15.07.19 um 08:49 schrieb Deng, Emily:

>> Hi David,

>>       You are right, it will copy per-vm resv.

>>       But currently, it still has the delay free issue which non per vm bo doesn't

>has. Maybe it already has new fences append to this resv object before copy.

>>

>> Hi Christian,

>>      Do you have any suggestion about this? For per vm bo, it seems always

>delay to free the ttm bo.

>>

>> Best wishes

>> Emily Deng

>>> -----Original Message-----

>>> From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>>> Sent: Wednesday, July 10, 2019 9:28 PM

>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>

>>> It doesn't make sense that freeing BO still uses per-vm resv.

>>>

>>> I remember when BO is in release list, its resv will be from per-vm resv copy.

>>> Could you check it?

>>>

>>> -David

>>>

>>> 在 2019/7/10 17:29, Emily Deng 写道:

>>>> For vulkan cts allocation test cases, they will create a series of

>>>> bos, and then free them. As it has lots of alloction test cases with

>>>> the same vm, as per vm bo feature enable, all of those bos' resv are

>>>> the same. But the bo free is quite slow, as they use the same resv

>>>> object, for every time, free a bo, it will check the resv whether

>>>> signal, if it signal, then will free it. But as the test cases will

>>>> continue to create bo, and the resv fence is increasing. So the free

>>>> is more

>>> slower than creating. It will cause memory exhausting.

>>>> Method:

>>>> When the resv signal, release all the bos which are use the same

>>>> resv object.

>>>>

>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>>> ---

>>>>    drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----

>>>>    1 file changed, 24 insertions(+), 5 deletions(-)

>>>>

>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>>> ttm_buffer_object *bo,

>>>>    {

>>>>    	struct ttm_bo_global *glob = bo->bdev->glob;

>>>>    	struct reservation_object *resv;

>>>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>>>    	int ret;

>>>>

>>>>    	if (unlikely(list_empty(&bo->ddestroy)))

>>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>>> ttm_buffer_object *bo,

>>>>    							   interruptible,

>>>>    							   30 * HZ);

>>>>

>>>> -		if (lret < 0)

>>>> +		if (lret < 0) {

>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    			return lret;

>>>> -		else if (lret == 0)

>>>> +		}

>>>> +		else if (lret == 0) {

>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    			return -EBUSY;

>>>> +		}

>>>>

>>>>    		spin_lock(&glob->lru_lock);

>>>>    		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv))

>>> { @@

>>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct

>>>> ttm_buffer_object

>>> *bo,

>>>>    			 * here.

>>>>    			 */

>>>>    			spin_unlock(&glob->lru_lock);

>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    			return 0;

>>>>    		}

>>>>    		ret = 0;

>>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>>> ttm_buffer_object *bo,

>>>>    		if (unlock_resv)

>>>>    			kcl_reservation_object_unlock(bo->resv);

>>>>    		spin_unlock(&glob->lru_lock);

>>>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    		return ret;

>>>>    	}

>>>>

>>>>    	ttm_bo_del_from_lru(bo);

>>>>    	list_del_init(&bo->ddestroy);

>>>>    	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>>>> -

>>>>    	spin_unlock(&glob->lru_lock);

>>>>    	ttm_bo_cleanup_memtype_use(bo);

>>>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>>>> +

>>>> +	spin_lock(&glob->lru_lock);

>>>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>>> ddestroy, ddestroy) {

>>>> +		if (resv_bo->resv == bo->resv) {

>>>> +			ttm_bo_del_from_lru(resv_bo);

>>>> +			list_del_init(&resv_bo->ddestroy);

>>>> +			spin_unlock(&glob->lru_lock);

>>>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>>>> +			kref_put(&resv_bo->list_kref, ttm_bo_release_list);

>>>> +			spin_lock(&glob->lru_lock);

>>>> +		}

>>>> +	}

>>>> +	spin_unlock(&glob->lru_lock);

>>>>

>>>>    	if (unlock_resv)

>>>>    		kcl_reservation_object_unlock(bo->resv);

>>>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>>> ttm_bo_device *bdev, bool remove_all)

>>>>    			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

>>>>    		} else {

>>>>    			spin_unlock(&glob->lru_lock);

>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    		}

>>>> -

>>>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>    		spin_lock(&glob->lru_lock);

>>>>    	}

>>>>    	list_splice_tail(&removed, &bdev->ddestroy);
> Do you think we don't need to fix it?

No, when the application is exhausting memory then we can't expect 
anything else here.

See memory freeing is always delayed until it isn't used any more or 
when the process is killed after access is prevented (by clearing page 
tables for example).

What we could do is maybe look into why we don't block until the memory 
is freed during command submission, but apart from that this sounds like 
perfectly expected behavior.

Regards,
Christian.

Am 15.07.19 um 11:34 schrieb Deng, Emily:
> Hi Christian,

>      As has this behavior, when test vulkan cts allocation test, it will exhausting the memory, and cause out of memory. Do you think we don't need to fix it?

>

> Best wishes

> Emily Deng

>> -----Original Message-----

>> From: Koenig, Christian <Christian.Koenig@amd.com>

>> Sent: Monday, July 15, 2019 5:31 PM

>> To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

>> <David1.Zhou@amd.com>

>> Cc: amd-gfx@lists.freedesktop.org

>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>

>> Hi guys,

>>

>>> Do you have any suggestion about this? For per vm bo, it seems always

>> delay to free the ttm bo.

>> Yeah, and that is correct behavior.

>>

>> Since we don't know who is using a per-vm BO we need to wait for all

>> command submissions in flight when it is freed.

>>

>> For this we copy the current state of the shared reservation object into the

>> private one in ttm_bo_individualize_resv.

>>

>> Regards,

>> Christian.

>>

>> Am 15.07.19 um 08:49 schrieb Deng, Emily:

>>> Hi David,

>>>        You are right, it will copy per-vm resv.

>>>        But currently, it still has the delay free issue which non per vm bo doesn't

>> has. Maybe it already has new fences append to this resv object before copy.

>>> Hi Christian,

>>>       Do you have any suggestion about this? For per vm bo, it seems always

>> delay to free the ttm bo.

>>> Best wishes

>>> Emily Deng

>>>> -----Original Message-----

>>>> From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>>>> Sent: Wednesday, July 10, 2019 9:28 PM

>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>>

>>>> It doesn't make sense that freeing BO still uses per-vm resv.

>>>>

>>>> I remember when BO is in release list, its resv will be from per-vm resv copy.

>>>> Could you check it?

>>>>

>>>> -David

>>>>

>>>> 在 2019/7/10 17:29, Emily Deng 写道:

>>>>> For vulkan cts allocation test cases, they will create a series of

>>>>> bos, and then free them. As it has lots of alloction test cases with

>>>>> the same vm, as per vm bo feature enable, all of those bos' resv are

>>>>> the same. But the bo free is quite slow, as they use the same resv

>>>>> object, for every time, free a bo, it will check the resv whether

>>>>> signal, if it signal, then will free it. But as the test cases will

>>>>> continue to create bo, and the resv fence is increasing. So the free

>>>>> is more

>>>> slower than creating. It will cause memory exhausting.

>>>>> Method:

>>>>> When the resv signal, release all the bos which are use the same

>>>>> resv object.

>>>>>

>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>>>> ---

>>>>>     drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++-----

>>>>>     1 file changed, 24 insertions(+), 5 deletions(-)

>>>>>

>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>>>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>>>> ttm_buffer_object *bo,

>>>>>     {

>>>>>     	struct ttm_bo_global *glob = bo->bdev->glob;

>>>>>     	struct reservation_object *resv;

>>>>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>>>>     	int ret;

>>>>>

>>>>>     	if (unlikely(list_empty(&bo->ddestroy)))

>>>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>>>> ttm_buffer_object *bo,

>>>>>     							   interruptible,

>>>>>     							   30 * HZ);

>>>>>

>>>>> -		if (lret < 0)

>>>>> +		if (lret < 0) {

>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     			return lret;

>>>>> -		else if (lret == 0)

>>>>> +		}

>>>>> +		else if (lret == 0) {

>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     			return -EBUSY;

>>>>> +		}

>>>>>

>>>>>     		spin_lock(&glob->lru_lock);

>>>>>     		if (unlock_resv && !kcl_reservation_object_trylock(bo->resv))

>>>> { @@

>>>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct

>>>>> ttm_buffer_object

>>>> *bo,

>>>>>     			 * here.

>>>>>     			 */

>>>>>     			spin_unlock(&glob->lru_lock);

>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     			return 0;

>>>>>     		}

>>>>>     		ret = 0;

>>>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>>>> ttm_buffer_object *bo,

>>>>>     		if (unlock_resv)

>>>>>     			kcl_reservation_object_unlock(bo->resv);

>>>>>     		spin_unlock(&glob->lru_lock);

>>>>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     		return ret;

>>>>>     	}

>>>>>

>>>>>     	ttm_bo_del_from_lru(bo);

>>>>>     	list_del_init(&bo->ddestroy);

>>>>>     	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>>>>> -

>>>>>     	spin_unlock(&glob->lru_lock);

>>>>>     	ttm_bo_cleanup_memtype_use(bo);

>>>>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>> +

>>>>> +	spin_lock(&glob->lru_lock);

>>>>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>>>> ddestroy, ddestroy) {

>>>>> +		if (resv_bo->resv == bo->resv) {

>>>>> +			ttm_bo_del_from_lru(resv_bo);

>>>>> +			list_del_init(&resv_bo->ddestroy);

>>>>> +			spin_unlock(&glob->lru_lock);

>>>>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>>>>> +			kref_put(&resv_bo->list_kref, ttm_bo_release_list);

>>>>> +			spin_lock(&glob->lru_lock);

>>>>> +		}

>>>>> +	}

>>>>> +	spin_unlock(&glob->lru_lock);

>>>>>

>>>>>     	if (unlock_resv)

>>>>>     		kcl_reservation_object_unlock(bo->resv);

>>>>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>>>> ttm_bo_device *bdev, bool remove_all)

>>>>>     			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

>>>>>     		} else {

>>>>>     			spin_unlock(&glob->lru_lock);

>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     		}

>>>>> -

>>>>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>     		spin_lock(&glob->lru_lock);

>>>>>     	}

>>>>>     	list_splice_tail(&removed, &bdev->ddestroy);
Hi Christian,
     Do you think we could free all those bos those are in current destroy list when the current resv is signal in ttm_bo_cleanup_refs?

Best wishes
Emily Deng

>-----Original Message-----

>From: Koenig, Christian <Christian.Koenig@amd.com>

>Sent: Monday, July 15, 2019 5:41 PM

>To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

><David1.Zhou@amd.com>

>Cc: amd-gfx@lists.freedesktop.org

>Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>

>> Do you think we don't need to fix it?

>No, when the application is exhausting memory then we can't expect anything

>else here.

>

>See memory freeing is always delayed until it isn't used any more or when the

>process is killed after access is prevented (by clearing page tables for example).

>

>What we could do is maybe look into why we don't block until the memory is

>freed during command submission, but apart from that this sounds like

>perfectly expected behavior.

>

>Regards,

>Christian.

>

>Am 15.07.19 um 11:34 schrieb Deng, Emily:

>> Hi Christian,

>>      As has this behavior, when test vulkan cts allocation test, it will

>exhausting the memory, and cause out of memory. Do you think we don't

>need to fix it?

>>

>> Best wishes

>> Emily Deng

>>> -----Original Message-----

>>> From: Koenig, Christian <Christian.Koenig@amd.com>

>>> Sent: Monday, July 15, 2019 5:31 PM

>>> To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

>>> <David1.Zhou@amd.com>

>>> Cc: amd-gfx@lists.freedesktop.org

>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>

>>> Hi guys,

>>>

>>>> Do you have any suggestion about this? For per vm bo, it seems

>>>> always

>>> delay to free the ttm bo.

>>> Yeah, and that is correct behavior.

>>>

>>> Since we don't know who is using a per-vm BO we need to wait for all

>>> command submissions in flight when it is freed.

>>>

>>> For this we copy the current state of the shared reservation object

>>> into the private one in ttm_bo_individualize_resv.

>>>

>>> Regards,

>>> Christian.

>>>

>>> Am 15.07.19 um 08:49 schrieb Deng, Emily:

>>>> Hi David,

>>>>        You are right, it will copy per-vm resv.

>>>>        But currently, it still has the delay free issue which non

>>>> per vm bo doesn't

>>> has. Maybe it already has new fences append to this resv object before

>copy.

>>>> Hi Christian,

>>>>       Do you have any suggestion about this? For per vm bo, it seems

>>>> always

>>> delay to free the ttm bo.

>>>> Best wishes

>>>> Emily Deng

>>>>> -----Original Message-----

>>>>> From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>>>>> Sent: Wednesday, July 10, 2019 9:28 PM

>>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-

>gfx@lists.freedesktop.org

>>>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>>>

>>>>> It doesn't make sense that freeing BO still uses per-vm resv.

>>>>>

>>>>> I remember when BO is in release list, its resv will be from per-vm resv

>copy.

>>>>> Could you check it?

>>>>>

>>>>> -David

>>>>>

>>>>> 在 2019/7/10 17:29, Emily Deng 写道:

>>>>>> For vulkan cts allocation test cases, they will create a series of

>>>>>> bos, and then free them. As it has lots of alloction test cases

>>>>>> with the same vm, as per vm bo feature enable, all of those bos'

>>>>>> resv are the same. But the bo free is quite slow, as they use the

>>>>>> same resv object, for every time, free a bo, it will check the

>>>>>> resv whether signal, if it signal, then will free it. But as the

>>>>>> test cases will continue to create bo, and the resv fence is

>>>>>> increasing. So the free is more

>>>>> slower than creating. It will cause memory exhausting.

>>>>>> Method:

>>>>>> When the resv signal, release all the bos which are use the same

>>>>>> resv object.

>>>>>>

>>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>>>>> ---

>>>>>>     drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++----

>-

>>>>>>     1 file changed, 24 insertions(+), 5 deletions(-)

>>>>>>

>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>>>>> ttm_buffer_object *bo,

>>>>>>     {

>>>>>>     	struct ttm_bo_global *glob = bo->bdev->glob;

>>>>>>     	struct reservation_object *resv;

>>>>>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>>>>>     	int ret;

>>>>>>

>>>>>>     	if (unlikely(list_empty(&bo->ddestroy)))

>>>>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>>>>> ttm_buffer_object *bo,

>>>>>>     							   interruptible,

>>>>>>     							   30 * HZ);

>>>>>>

>>>>>> -		if (lret < 0)

>>>>>> +		if (lret < 0) {

>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     			return lret;

>>>>>> -		else if (lret == 0)

>>>>>> +		}

>>>>>> +		else if (lret == 0) {

>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     			return -EBUSY;

>>>>>> +		}

>>>>>>

>>>>>>     		spin_lock(&glob->lru_lock);

>>>>>>     		if (unlock_resv &&

>>>>>> !kcl_reservation_object_trylock(bo->resv))

>>>>> { @@

>>>>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct

>>>>>> ttm_buffer_object

>>>>> *bo,

>>>>>>     			 * here.

>>>>>>     			 */

>>>>>>     			spin_unlock(&glob->lru_lock);

>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     			return 0;

>>>>>>     		}

>>>>>>     		ret = 0;

>>>>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>>>>> ttm_buffer_object *bo,

>>>>>>     		if (unlock_resv)

>>>>>>     			kcl_reservation_object_unlock(bo->resv);

>>>>>>     		spin_unlock(&glob->lru_lock);

>>>>>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     		return ret;

>>>>>>     	}

>>>>>>

>>>>>>     	ttm_bo_del_from_lru(bo);

>>>>>>     	list_del_init(&bo->ddestroy);

>>>>>>     	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>>>>>> -

>>>>>>     	spin_unlock(&glob->lru_lock);

>>>>>>     	ttm_bo_cleanup_memtype_use(bo);

>>>>>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>> +

>>>>>> +	spin_lock(&glob->lru_lock);

>>>>>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>>>>> ddestroy, ddestroy) {

>>>>>> +		if (resv_bo->resv == bo->resv) {

>>>>>> +			ttm_bo_del_from_lru(resv_bo);

>>>>>> +			list_del_init(&resv_bo->ddestroy);

>>>>>> +			spin_unlock(&glob->lru_lock);

>>>>>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>>>>>> +			kref_put(&resv_bo->list_kref,

>ttm_bo_release_list);

>>>>>> +			spin_lock(&glob->lru_lock);

>>>>>> +		}

>>>>>> +	}

>>>>>> +	spin_unlock(&glob->lru_lock);

>>>>>>

>>>>>>     	if (unlock_resv)

>>>>>>     		kcl_reservation_object_unlock(bo->resv);

>>>>>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>>>>> ttm_bo_device *bdev, bool remove_all)

>>>>>>     			ttm_bo_cleanup_refs(bo, false, !remove_all,

>true);

>>>>>>     		} else {

>>>>>>     			spin_unlock(&glob->lru_lock);

>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     		}

>>>>>> -

>>>>>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>     		spin_lock(&glob->lru_lock);

>>>>>>     	}

>>>>>>     	list_splice_tail(&removed, &bdev->ddestroy);
Hi Emily,

no, we can only cleanup the current one because we don't have a 
reference to the other ones.

At least that's how I understand you question,
Christian.

Am 15.07.19 um 12:47 schrieb Deng, Emily:
> Hi Christian,

>       Do you think we could free all those bos those are in current destroy list when the current resv is signal in ttm_bo_cleanup_refs?

>

> Best wishes

> Emily Deng

>

>> -----Original Message-----

>> From: Koenig, Christian <Christian.Koenig@amd.com>

>> Sent: Monday, July 15, 2019 5:41 PM

>> To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

>> <David1.Zhou@amd.com>

>> Cc: amd-gfx@lists.freedesktop.org

>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>

>>> Do you think we don't need to fix it?

>> No, when the application is exhausting memory then we can't expect anything

>> else here.

>>

>> See memory freeing is always delayed until it isn't used any more or when the

>> process is killed after access is prevented (by clearing page tables for example).

>>

>> What we could do is maybe look into why we don't block until the memory is

>> freed during command submission, but apart from that this sounds like

>> perfectly expected behavior.

>>

>> Regards,

>> Christian.

>>

>> Am 15.07.19 um 11:34 schrieb Deng, Emily:

>>> Hi Christian,

>>>       As has this behavior, when test vulkan cts allocation test, it will

>> exhausting the memory, and cause out of memory. Do you think we don't

>> need to fix it?

>>> Best wishes

>>> Emily Deng

>>>> -----Original Message-----

>>>> From: Koenig, Christian <Christian.Koenig@amd.com>

>>>> Sent: Monday, July 15, 2019 5:31 PM

>>>> To: Deng, Emily <Emily.Deng@amd.com>; Zhou, David(ChunMing)

>>>> <David1.Zhou@amd.com>

>>>> Cc: amd-gfx@lists.freedesktop.org

>>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>>

>>>> Hi guys,

>>>>

>>>>> Do you have any suggestion about this? For per vm bo, it seems

>>>>> always

>>>> delay to free the ttm bo.

>>>> Yeah, and that is correct behavior.

>>>>

>>>> Since we don't know who is using a per-vm BO we need to wait for all

>>>> command submissions in flight when it is freed.

>>>>

>>>> For this we copy the current state of the shared reservation object

>>>> into the private one in ttm_bo_individualize_resv.

>>>>

>>>> Regards,

>>>> Christian.

>>>>

>>>> Am 15.07.19 um 08:49 schrieb Deng, Emily:

>>>>> Hi David,

>>>>>         You are right, it will copy per-vm resv.

>>>>>         But currently, it still has the delay free issue which non

>>>>> per vm bo doesn't

>>>> has. Maybe it already has new fences append to this resv object before

>> copy.

>>>>> Hi Christian,

>>>>>        Do you have any suggestion about this? For per vm bo, it seems

>>>>> always

>>>> delay to free the ttm bo.

>>>>> Best wishes

>>>>> Emily Deng

>>>>>> -----Original Message-----

>>>>>> From: Zhou, David(ChunMing) <David1.Zhou@amd.com>

>>>>>> Sent: Wednesday, July 10, 2019 9:28 PM

>>>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-

>> gfx@lists.freedesktop.org

>>>>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

>>>>>>

>>>>>> It doesn't make sense that freeing BO still uses per-vm resv.

>>>>>>

>>>>>> I remember when BO is in release list, its resv will be from per-vm resv

>> copy.

>>>>>> Could you check it?

>>>>>>

>>>>>> -David

>>>>>>

>>>>>> 在 2019/7/10 17:29, Emily Deng 写道:

>>>>>>> For vulkan cts allocation test cases, they will create a series of

>>>>>>> bos, and then free them. As it has lots of alloction test cases

>>>>>>> with the same vm, as per vm bo feature enable, all of those bos'

>>>>>>> resv are the same. But the bo free is quite slow, as they use the

>>>>>>> same resv object, for every time, free a bo, it will check the

>>>>>>> resv whether signal, if it signal, then will free it. But as the

>>>>>>> test cases will continue to create bo, and the resv fence is

>>>>>>> increasing. So the free is more

>>>>>> slower than creating. It will cause memory exhausting.

>>>>>>> Method:

>>>>>>> When the resv signal, release all the bos which are use the same

>>>>>>> resv object.

>>>>>>>

>>>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>>>>>> ---

>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c | 29 ++++++++++++++++++++++++----

>> -

>>>>>>>      1 file changed, 24 insertions(+), 5 deletions(-)

>>>>>>>

>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644

>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c

>>>>>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct

>>>>>> ttm_buffer_object *bo,

>>>>>>>      {

>>>>>>>      	struct ttm_bo_global *glob = bo->bdev->glob;

>>>>>>>      	struct reservation_object *resv;

>>>>>>> +	struct ttm_buffer_object *resv_bo, *resv_bo_next;

>>>>>>>      	int ret;

>>>>>>>

>>>>>>>      	if (unlikely(list_empty(&bo->ddestroy)))

>>>>>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct

>>>>>> ttm_buffer_object *bo,

>>>>>>>      							   interruptible,

>>>>>>>      							   30 * HZ);

>>>>>>>

>>>>>>> -		if (lret < 0)

>>>>>>> +		if (lret < 0) {

>>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      			return lret;

>>>>>>> -		else if (lret == 0)

>>>>>>> +		}

>>>>>>> +		else if (lret == 0) {

>>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      			return -EBUSY;

>>>>>>> +		}

>>>>>>>

>>>>>>>      		spin_lock(&glob->lru_lock);

>>>>>>>      		if (unlock_resv &&

>>>>>>> !kcl_reservation_object_trylock(bo->resv))

>>>>>> { @@

>>>>>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct

>>>>>>> ttm_buffer_object

>>>>>> *bo,

>>>>>>>      			 * here.

>>>>>>>      			 */

>>>>>>>      			spin_unlock(&glob->lru_lock);

>>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      			return 0;

>>>>>>>      		}

>>>>>>>      		ret = 0;

>>>>>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct

>>>>>> ttm_buffer_object *bo,

>>>>>>>      		if (unlock_resv)

>>>>>>>      			kcl_reservation_object_unlock(bo->resv);

>>>>>>>      		spin_unlock(&glob->lru_lock);

>>>>>>> +		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      		return ret;

>>>>>>>      	}

>>>>>>>

>>>>>>>      	ttm_bo_del_from_lru(bo);

>>>>>>>      	list_del_init(&bo->ddestroy);

>>>>>>>      	kref_put(&bo->list_kref, ttm_bo_ref_bug);

>>>>>>> -

>>>>>>>      	spin_unlock(&glob->lru_lock);

>>>>>>>      	ttm_bo_cleanup_memtype_use(bo);

>>>>>>> +	kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>> +

>>>>>>> +	spin_lock(&glob->lru_lock);

>>>>>>> +	list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-

>>>>>>> ddestroy, ddestroy) {

>>>>>>> +		if (resv_bo->resv == bo->resv) {

>>>>>>> +			ttm_bo_del_from_lru(resv_bo);

>>>>>>> +			list_del_init(&resv_bo->ddestroy);

>>>>>>> +			spin_unlock(&glob->lru_lock);

>>>>>>> +			ttm_bo_cleanup_memtype_use(resv_bo);

>>>>>>> +			kref_put(&resv_bo->list_kref,

>> ttm_bo_release_list);

>>>>>>> +			spin_lock(&glob->lru_lock);

>>>>>>> +		}

>>>>>>> +	}

>>>>>>> +	spin_unlock(&glob->lru_lock);

>>>>>>>

>>>>>>>      	if (unlock_resv)

>>>>>>>      		kcl_reservation_object_unlock(bo->resv);

>>>>>>> @@ -639,9 +659,8 @@ static bool ttm_bo_delayed_delete(struct

>>>>>> ttm_bo_device *bdev, bool remove_all)

>>>>>>>      			ttm_bo_cleanup_refs(bo, false, !remove_all,

>> true);

>>>>>>>      		} else {

>>>>>>>      			spin_unlock(&glob->lru_lock);

>>>>>>> +			kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      		}

>>>>>>> -

>>>>>>> -		kref_put(&bo->list_kref, ttm_bo_release_list);

>>>>>>>      		spin_lock(&glob->lru_lock);

>>>>>>>      	}

>>>>>>>      	list_splice_tail(&removed, &bdev->ddestroy);