[2/2] drm/amdgpu: use kiq to do invalidate tlb

Submitted by Deng, Emily on Aug. 16, 2018, 1:05 p.m.

Details

Message ID 1534424701-6723-2-git-send-email-Emily.Deng@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Deng, Emily Aug. 16, 2018, 1:05 p.m.
To avoid the tlb flush not interrupted by world switch, use kiq and one
command to do tlb invalidate.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 60 ++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6265b88..19ef7711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -212,6 +212,10 @@  enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 
+#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
 int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6..3885636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,9 +22,6 @@ 
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ed467de..6c164bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -311,6 +311,58 @@  static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
 	return req;
 }
 
+signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *ring = &kiq->ring;
+
+	if (!ring->ready)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	amdgpu_ring_alloc(ring, 32);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+					    ref, mask);
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq;
+
+	might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq;
+
+	return 0;
+
+failed_kiq:
+	pr_err("failed to invalidate tlb with kiq\n");
+	return r;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -332,6 +384,7 @@  static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
 	unsigned i, j;
+	int r;
 
 	spin_lock(&adev->gmc.invalidate_lock);
 
@@ -339,6 +392,13 @@  static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
+		spin_unlock(&adev->gmc.invalidate_lock);
+		r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
+			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+		spin_lock(&adev->gmc.invalidate_lock);
+		if (!r)
+			continue;
+
 		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
 		/* Busy wait for ACK.*/

Comments

Am 16.08.2018 um 15:05 schrieb Emily Deng:
> To avoid the tlb flush not interrupted by world switch, use kiq and one
> command to do tlb invalidate.
>
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 60 ++++++++++++++++++++++++++++++++
>   3 files changed, 64 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6265b88..19ef7711 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
>   	AMDGPU_CP_KIQ_IRQ_LAST
>   };
>   
> +#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
> +#define MAX_KIQ_REG_TRY 20
> +
>   int amdgpu_device_ip_set_clockgating_state(void *dev,
>   					   enum amd_ip_block_type block_type,
>   					   enum amd_clockgating_state state);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 21adb1b6..3885636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -22,9 +22,6 @@
>    */
>   
>   #include "amdgpu.h"
> -#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
> -#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
> -#define MAX_KIQ_REG_TRY 20
>   
>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index ed467de..6c164bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
>   	return req;
>   }
>   
> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	signed long r, cnt = 0;
> +	unsigned long flags;
> +	uint32_t seq;
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +	struct amdgpu_ring *ring = &kiq->ring;
> +
> +	if (!ring->ready)
> +		return -EINVAL;
> +
> +	spin_lock_irqsave(&kiq->ring_lock, flags);
> +
> +	amdgpu_ring_alloc(ring, 32);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
> +					    ref, mask);
> +	amdgpu_fence_emit_polling(ring, &seq);
> +	amdgpu_ring_commit(ring);
> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
> +	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> +
> +	/* don't wait anymore for gpu reset case because this way may
> +	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
> +	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
> +	 * never return if we keep waiting in virt_kiq_rreg, which cause
> +	 * gpu_recover() hang there.
> +	 *
> +	 * also don't wait anymore for IRQ context
> +	 * */
> +	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
> +		goto failed_kiq;
> +
> +	might_sleep();
> +
> +	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
> +		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
> +		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> +	}
> +
> +	if (cnt > MAX_KIQ_REG_TRY)
> +		goto failed_kiq;
> +
> +	return 0;
> +
> +failed_kiq:
> +	pr_err("failed to invalidate tlb with kiq\n");
> +	return r;
> +}
> +
>   /*
>    * GART
>    * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
>   	unsigned i, j;
> +	int r;
>   
>   	spin_lock(&adev->gmc.invalidate_lock);
>   
> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>   
> +		spin_unlock(&adev->gmc.invalidate_lock);

Well just remove the lock altogether. Taking it and dropping it again 
immediately doesn't do anything useful.

> +		r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
> +			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
> +		spin_lock(&adev->gmc.invalidate_lock);
> +		if (!r)
> +			continue;
> +
>   		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
>   
>   		/* Busy wait for ACK.*/
Since you now use the KIQ to wait for the TLB flush I think we can now 
remove the MMIO implementation.

Regards,
Christian.
>-----Original Message-----

>From: Christian König <ckoenig.leichtzumerken@gmail.com>

>Sent: Thursday, August 16, 2018 9:24 PM

>To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

>

>Am 16.08.2018 um 15:05 schrieb Emily Deng:

>> To avoid the tlb flush not interrupted by world switch, use kiq and

>> one command to do tlb invalidate.

>>

>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>> ---

>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +++

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --

>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 60

>++++++++++++++++++++++++++++++++

>>   3 files changed, 64 insertions(+), 3 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>> index 6265b88..19ef7711 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {

>>   	AMDGPU_CP_KIQ_IRQ_LAST

>>   };

>>

>> +#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */

>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */

>> +#define MAX_KIQ_REG_TRY 20

>> +

>>   int amdgpu_device_ip_set_clockgating_state(void *dev,

>>   					   enum amd_ip_block_type block_type,

>>   					   enum amd_clockgating_state state);

>diff --git

>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>> index 21adb1b6..3885636 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>> @@ -22,9 +22,6 @@

>>    */

>>

>>   #include "amdgpu.h"

>> -#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */

>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */

>> -#define MAX_KIQ_REG_TRY 20

>>

>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)

>>   {

>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>> index ed467de..6c164bd 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>> @@ -311,6 +311,58 @@ static uint32_t

>gmc_v9_0_get_invalidate_req(unsigned int vmid)

>>   	return req;

>>   }

>>

>> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,

>> +						  uint32_t reg0, uint32_t reg1,

>> +						  uint32_t ref, uint32_t mask)

>> +{

>> +	signed long r, cnt = 0;

>> +	unsigned long flags;

>> +	uint32_t seq;

>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;

>> +	struct amdgpu_ring *ring = &kiq->ring;

>> +

>> +	if (!ring->ready)

>> +		return -EINVAL;

>> +

>> +	spin_lock_irqsave(&kiq->ring_lock, flags);

>> +

>> +	amdgpu_ring_alloc(ring, 32);

>> +	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,

>> +					    ref, mask);

>> +	amdgpu_fence_emit_polling(ring, &seq);

>> +	amdgpu_ring_commit(ring);

>> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);

>> +

>> +	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);

>> +

>> +	/* don't wait anymore for gpu reset case because this way may

>> +	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg

>> +	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will

>> +	 * never return if we keep waiting in virt_kiq_rreg, which cause

>> +	 * gpu_recover() hang there.

>> +	 *

>> +	 * also don't wait anymore for IRQ context

>> +	 * */

>> +	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))

>> +		goto failed_kiq;

>> +

>> +	might_sleep();

>> +

>> +	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

>> +		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);

>> +		r = amdgpu_fence_wait_polling(ring, seq,

>MAX_KIQ_REG_WAIT);

>> +	}

>> +

>> +	if (cnt > MAX_KIQ_REG_TRY)

>> +		goto failed_kiq;

>> +

>> +	return 0;

>> +

>> +failed_kiq:

>> +	pr_err("failed to invalidate tlb with kiq\n");

>> +	return r;

>> +}

>> +

>>   /*

>>    * GART

>>    * VMID 0 is the physical GPU addresses as used by the kernel.

>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct

>amdgpu_device *adev,

>>   	/* Use register 17 for GART */

>>   	const unsigned eng = 17;

>>   	unsigned i, j;

>> +	int r;

>>

>>   	spin_lock(&adev->gmc.invalidate_lock);

>>

>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct

>amdgpu_device *adev,

>>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];

>>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);

>>

>> +		spin_unlock(&adev->gmc.invalidate_lock);

>

>Well just remove the lock altogether. Taking it and dropping it again

>immediately doesn't do anything useful.

>

>> +		r = amdgpu_kiq_reg_write_reg_wait(adev, hub-

>>vm_inv_eng0_req + eng,

>> +			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);

>> +		spin_lock(&adev->gmc.invalidate_lock);

>> +		if (!r)

>> +			continue;

>> +

>>   		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);

>>

>>   		/* Busy wait for ACK.*/

>Since you now use the KIQ to wait for the TLB flush I think we can now remove

>the MMIO implementation.

Ok. If remove the MMIO implementation, then we don't need adev->gmc.invalidate_lock.
>

>Regards,

>Christian.
>-----Original Message-----

>From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Deng,

>Emily

>Sent: Friday, August 17, 2018 10:19 AM

>To: Koenig, Christian <Christian.Koenig@amd.com>; amd-

>gfx@lists.freedesktop.org

>Subject: RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

>

>>-----Original Message-----

>>From: Christian König <ckoenig.leichtzumerken@gmail.com>

>>Sent: Thursday, August 16, 2018 9:24 PM

>>To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org

>>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

>>

>>Am 16.08.2018 um 15:05 schrieb Emily Deng:

>>> To avoid the tlb flush not interrupted by world switch, use kiq and

>>> one command to do tlb invalidate.

>>>

>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

>>> ---

>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +++

>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --

>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 60

>>++++++++++++++++++++++++++++++++

>>>   3 files changed, 64 insertions(+), 3 deletions(-)

>>>

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>>> index 6265b88..19ef7711 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

>>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {

>>>   	AMDGPU_CP_KIQ_IRQ_LAST

>>>   };

>>>

>>> +#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */

>>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */

>>> +#define MAX_KIQ_REG_TRY 20

>>> +

>>>   int amdgpu_device_ip_set_clockgating_state(void *dev,

>>>   					   enum amd_ip_block_type block_type,

>>>   					   enum amd_clockgating_state state);

>>diff --git

>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>>> index 21adb1b6..3885636 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

>>> @@ -22,9 +22,6 @@

>>>    */

>>>

>>>   #include "amdgpu.h"

>>> -#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */

>>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */

>>> -#define MAX_KIQ_REG_TRY 20

>>>

>>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)

>>>   {

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>>> index ed467de..6c164bd 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

>>> @@ -311,6 +311,58 @@ static uint32_t

>>gmc_v9_0_get_invalidate_req(unsigned int vmid)

>>>   	return req;

>>>   }

>>>

>>> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,

>>> +						  uint32_t reg0, uint32_t reg1,

>>> +						  uint32_t ref, uint32_t mask) {

>>> +	signed long r, cnt = 0;

>>> +	unsigned long flags;

>>> +	uint32_t seq;

>>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;

>>> +	struct amdgpu_ring *ring = &kiq->ring;

>>> +

>>> +	if (!ring->ready)

>>> +		return -EINVAL;

>>> +

>>> +	spin_lock_irqsave(&kiq->ring_lock, flags);

>>> +

>>> +	amdgpu_ring_alloc(ring, 32);

>>> +	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,

>>> +					    ref, mask);

>>> +	amdgpu_fence_emit_polling(ring, &seq);

>>> +	amdgpu_ring_commit(ring);

>>> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);

>>> +

>>> +	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);

>>> +

>>> +	/* don't wait anymore for gpu reset case because this way may

>>> +	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg

>>> +	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will

>>> +	 * never return if we keep waiting in virt_kiq_rreg, which cause

>>> +	 * gpu_recover() hang there.

>>> +	 *

>>> +	 * also don't wait anymore for IRQ context

>>> +	 * */

>>> +	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))

>>> +		goto failed_kiq;

>>> +

>>> +	might_sleep();

>>> +

>>> +	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {

>>> +		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);

>>> +		r = amdgpu_fence_wait_polling(ring, seq,

>>MAX_KIQ_REG_WAIT);

>>> +	}

>>> +

>>> +	if (cnt > MAX_KIQ_REG_TRY)

>>> +		goto failed_kiq;

>>> +

>>> +	return 0;

>>> +

>>> +failed_kiq:

>>> +	pr_err("failed to invalidate tlb with kiq\n");

>>> +	return r;

>>> +}

>>> +

>>>   /*

>>>    * GART

>>>    * VMID 0 is the physical GPU addresses as used by the kernel.

>>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct

>>amdgpu_device *adev,

>>>   	/* Use register 17 for GART */

>>>   	const unsigned eng = 17;

>>>   	unsigned i, j;

>>> +	int r;

>>>

>>>   	spin_lock(&adev->gmc.invalidate_lock);

>>>

>>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct

>>amdgpu_device *adev,

>>>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];

>>>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);

>>>

>>> +		spin_unlock(&adev->gmc.invalidate_lock);

>>

>>Well just remove the lock altogether. Taking it and dropping it again

>>immediately doesn't do anything useful.

>>

>>> +		r = amdgpu_kiq_reg_write_reg_wait(adev, hub-

>>>vm_inv_eng0_req + eng,

>>> +			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);

>>> +		spin_lock(&adev->gmc.invalidate_lock);

>>> +		if (!r)

>>> +			continue;

>>> +

>>>   		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);

>>>

>>>   		/* Busy wait for ACK.*/

>>Since you now use the KIQ to wait for the TLB flush I think we can now

>>remove the MMIO implementation.

>Ok. If remove the MMIO implementation, then we don't need adev-

>>gmc.invalidate_lock.

Sorry, just forget one thing, the MMIO path couldn't be removed, as the kiq only 
could be used when the kiq ring is ready for using, so when the kiq is not ready,
 need fallback to the MMIO path.
>>

>>Regards,

>>Christian.

>_______________________________________________

>amd-gfx mailing list

>amd-gfx@lists.freedesktop.org

>https://lists.freedesktop.org/mailman/listinfo/amd-gfx