drm/amdgpu: fix the ib test hang when gfx is in "idle" state

Submitted by Huang, Ray on April 20, 2018, 9:40 a.m.

Details

Message ID 1524217255-25968-1-git-send-email-ray.huang@amd.com
State New
Headers show
Series "drm/amdgpu: fix the ib test hang when gfx is in "idle" state" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Huang, Ray April 20, 2018, 9:40 a.m.
"aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
Above patch defers the execution of gfx/compute ib tests. However, at that time,
the gfx may already go into idle state. If "idle" gfx receives command
submission, it will get hang in the system. So we must add is_gfx_on checking at
start of ib tests.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Cc: Shirish S <shirish.s@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 19 ++++++++++++++++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 16 ++--------------
 3 files changed, 22 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 59df4b7..a0263b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -905,6 +905,7 @@  struct amdgpu_gfx_funcs {
 	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
+	bool (*is_gfx_on)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_ngg_buf {
@@ -1855,6 +1856,7 @@  amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
+#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
 
 /* Common functions */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c2d278..a71d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -342,6 +342,18 @@  static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
+static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+		return true;
+
+	return false;
+}
+
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -353,6 +365,10 @@  static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	uint32_t tmp;
 	long r;
 
+	/* confirm gfx is not in "idle" state */
+	if (!amdgpu_gfx_is_gfx_on(adev))
+		return 0;
+
 	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -1085,7 +1101,8 @@  static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.read_wave_data = &gfx_v9_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
-	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+	.is_gfx_on = &gfx_v9_0_is_gfx_on
 };
 
 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 7712eb6..3553fba 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -254,28 +254,16 @@  static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
 	return smu10_reset_cc6_data(hwmgr);
 }
 
-static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = hwmgr->adev;
-
-	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
-	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
-	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
-		return true;
-
-	return false;
-}
-
 static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (smu10_data->gfx_off_controled_by_driver) {
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
 
 		/* confirm gfx is back to "on" state */
-		while (!smu10_is_gfx_on(hwmgr))
+		while (!amdgpu_gfx_is_gfx_on(adev))
 			msleep(1);
 	}
 

Comments

Am 20.04.2018 um 11:40 schrieb Huang Rui:
> "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> Above patch defers the execution of gfx/compute ib tests. However, at that time,
> the gfx may already go into idle state. If "idle" gfx receives command
> submission, it will get hang in the system. So we must add is_gfx_on checking at
> start of ib tests.

Do I see that right that you just skip the IB test when the GFX block is 
already turned of? In this case that would be a clear NAK.

BTW: How do you detect that we need to turn GFX on again?

Regards,
Christian.

>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Cc: Shirish S <shirish.s@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 19 ++++++++++++++++++-
>   drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 16 ++--------------
>   3 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 59df4b7..a0263b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
>   	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
>   	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
>   	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
> +	bool (*is_gfx_on)(struct amdgpu_device *adev);
>   };
>   
>   struct amdgpu_ngg_buf {
> @@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
>   #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
>   #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
> +#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
>   
>   /* Common functions */
>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 6c2d278..a71d711 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
>   	return r;
>   }
>   
> +static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
> +{
> +	uint32_t reg;
> +
> +	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> +	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> +	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> +		return true;
> +
> +	return false;
> +}
> +
>   static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> @@ -353,6 +365,10 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   	uint32_t tmp;
>   	long r;
>   
> +	/* confirm gfx is not in "idle" state */
> +	if (!amdgpu_gfx_is_gfx_on(adev))
> +		return 0;
> +
>   	r = amdgpu_device_wb_get(adev, &index);
>   	if (r) {
>   		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
> @@ -1085,7 +1101,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
>   	.read_wave_data = &gfx_v9_0_read_wave_data,
>   	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
>   	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
> -	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
> +	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
> +	.is_gfx_on = &gfx_v9_0_is_gfx_on
>   };
>   
>   static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> index 7712eb6..3553fba 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> @@ -254,28 +254,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
>   	return smu10_reset_cc6_data(hwmgr);
>   }
>   
> -static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = hwmgr->adev;
> -
> -	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> -	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> -	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> -		return true;
> -
> -	return false;
> -}
> -
>   static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
>   {
>   	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
> +	struct amdgpu_device *adev = hwmgr->adev;
>   
>   	if (smu10_data->gfx_off_controled_by_driver) {
>   		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
>   
>   		/* confirm gfx is back to "on" state */
> -		while (!smu10_is_gfx_on(hwmgr))
> +		while (!amdgpu_gfx_is_gfx_on(adev))
>   			msleep(1);
>   	}
>
On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
> 
> Do I see that right that you just skip the IB test when the GFX block is 
> already turned of? In this case that would be a clear NAK.
> 
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Thanks,
Ray
Hi Ray,

Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
>
> Do I see that right that you just skip the IB test when the GFX block is
> already turned of? In this case that would be a clear NAK.
>
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Working around that issue for the moment is ok, but please note that explicitly in both the commit message and a code comment.

But don't you run into the same problem when the UMD starts to submit commands?

I mean the idea of the IB test is that you "simulate" an userspace command submission and see if it works.

Regards,
Christian.


Thanks,
Ray

On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
>
> Do I see that right that you just skip the IB test when the GFX block is
> already turned of? In this case that would be a clear NAK.
>
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Thanks,
Ray
On Mon, Apr 23, 2018 at 05:52:28PM +0800, Huang Rui wrote:
> On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> > Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > > the gfx may already go into idle state. If "idle" gfx receives command
> > > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > > start of ib tests.
> > 
> > Do I see that right that you just skip the IB test when the GFX block is 
> > already turned of? In this case that would be a clear NAK.
> > 
> > BTW: How do you detect that we need to turn GFX on again?
> 
> Christian, I know point. But there is a hang issue if we would like try to
> disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
> find a good sequence to fix it. After that, I can even expose an debugfs
> interface to configure that. So I have to skip the test for the moment when
> gfx is in "idle".
> 

And in normal case, driver won't explicitly turn on/off the gfx. RLC
firmware will handle it.

Thanks,
Ray
On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> Hi Ray,
> 
> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
> 
>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>     that time,
>     > > the gfx may already go into idle state. If "idle" gfx receives command
>     > > submission, it will get hang in the system. So we must add is_gfx_on
>     checking at
>     > > start of ib tests.
>     >
>     > Do I see that right that you just skip the IB test when the GFX block is
>     > already turned of? In this case that would be a clear NAK.
>     >
>     > BTW: How do you detect that we need to turn GFX on again?
> 
>     Christian, I know point. But there is a hang issue if we would like try to
>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>     find a good sequence to fix it. After that, I can even expose an debugfs
>     interface to configure that. So I have to skip the test for the moment when
>     gfx is in "idle".
> 
> 
> Working around that issue for the moment is ok, but please note that explicitly
> in both the commit message and a code comment.

OK. Will add it at V2.

> 
> But don't you run into the same problem when the UMD starts to submit commands?

When UMD starts, RLC firmware will detect the "draw" command, then it will
power up gfx. So it won't have problem at that time. The mainly state
machine doesn't expose to driver side yet.

> 
> I mean the idea of the IB test is that you "simulate" an userspace command
> submission and see if it works.
> 

Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
my fix for enabling/disabling gfxoff at runtime.

Thanks,
Ray
Hi Ray,

Am 23.04.2018 14:08 schrieb Huang Rui <ray.huang@amd
On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> Hi Ray,
>
> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
>
>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>     that time,
>     > > the gfx may already go into idle state. If "idle" gfx receives command
>     > > submission, it will get hang in the system. So we must add is_gfx_on
>     checking at
>     > > start of ib tests.
>     >
>     > Do I see that right that you just skip the IB test when the GFX block is
>     > already turned of? In this case that would be a clear NAK.
>     >
>     > BTW: How do you detect that we need to turn GFX on again?
>
>     Christian, I know point. But there is a hang issue if we would like try to
>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>     find a good sequence to fix it. After that, I can even expose an debugfs
>     interface to configure that. So I have to skip the test for the moment when
>     gfx is in "idle".
>
>
> Working around that issue for the moment is ok, but please note that explicitly
> in both the commit message and a code comment.

OK. Will add it at V2.

>
> But don't you run into the same problem when the UMD starts to submit commands?

When UMD starts, RLC firmware will detect the "draw" command, then it will
power up gfx. So it won't have problem at that time. The mainly state
machine doesn't expose to driver side yet.

>
> I mean the idea of the IB test is that you "simulate" an userspace command
> submission and see if it works.
>

Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
my fix for enabling/disabling gfxoff at runtime.

Thanks,
Ray
On Mon, Apr 23, 2018 at 8:13 AM, Huang Rui <ray.huang@amd.com> wrote:
> On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
>> Hi Ray,
>>
>> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
>>
>>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>>     that time,
>>     > > the gfx may already go into idle state. If "idle" gfx receives command
>>     > > submission, it will get hang in the system. So we must add is_gfx_on
>>     checking at
>>     > > start of ib tests.
>>     >
>>     > Do I see that right that you just skip the IB test when the GFX block is
>>     > already turned of? In this case that would be a clear NAK.
>>     >
>>     > BTW: How do you detect that we need to turn GFX on again?
>>
>>     Christian, I know point. But there is a hang issue if we would like try to
>>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>>     find a good sequence to fix it. After that, I can even expose an debugfs
>>     interface to configure that. So I have to skip the test for the moment when
>>     gfx is in "idle".
>>
>>
>> Working around that issue for the moment is ok, but please note that explicitly
>> in both the commit message and a code comment.
>
> OK. Will add it at V2.
>
>>
>> But don't you run into the same problem when the UMD starts to submit commands?
>
> When UMD starts, RLC firmware will detect the "draw" command, then it will
> power up gfx. So it won't have problem at that time. The mainly state
> machine doesn't expose to driver side yet.
>
>>
>> I mean the idea of the IB test is that you "simulate" an userspace command
>> submission and see if it works.
>>
>
> Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
> my fix for enabling/disabling gfxoff at runtime.


Is there some special formatting in the IB required?  I don't really
see how this will work.  There is likely tons of state before the
actual draw command in the IB.

Alex

>
> Thanks,
> Ray
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On Mon, Apr 23, 2018 at 10:40:20PM +0800, Alex Deucher wrote:
> On Mon, Apr 23, 2018 at 8:13 AM, Huang Rui <ray.huang@amd.com> wrote:
> > On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> >> Hi Ray,
> >>
> >> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
> >>
> >>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> >>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
> >>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> >>     > > Above patch defers the execution of gfx/compute ib tests. However, at
> >>     that time,
> >>     > > the gfx may already go into idle state. If "idle" gfx receives command
> >>     > > submission, it will get hang in the system. So we must add is_gfx_on
> >>     checking at
> >>     > > start of ib tests.
> >>     >
> >>     > Do I see that right that you just skip the IB test when the GFX block is
> >>     > already turned of? In this case that would be a clear NAK.
> >>     >
> >>     > BTW: How do you detect that we need to turn GFX on again?
> >>
> >>     Christian, I know point. But there is a hang issue if we would like try to
> >>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
> >>     find a good sequence to fix it. After that, I can even expose an debugfs
> >>     interface to configure that. So I have to skip the test for the moment when
> >>     gfx is in "idle".
> >>
> >>
> >> Working around that issue for the moment is ok, but please note that explicitly
> >> in both the commit message and a code comment.
> >
> > OK. Will add it at V2.
> >
> >>
> >> But don't you run into the same problem when the UMD starts to submit commands?
> >
> > When UMD starts, RLC firmware will detect the "draw" command, then it will
> > power up gfx. So it won't have problem at that time. The mainly state
> > machine doesn't expose to driver side yet.
> >
> >>
> >> I mean the idea of the IB test is that you "simulate" an userspace command
> >> submission and see if it works.
> >>
> >
> > Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
> > my fix for enabling/disabling gfxoff at runtime.
> 
> 
> Is there some special formatting in the IB required?  I don't really
> see how this will work.  There is likely tons of state before the
> actual draw command in the IB.
> 

No, there isn't. The mainly behavior to turn on/off gfx is almost in RLC
firmware. From driver's perspective, we only just use smc mesg to
enable/disable the feature.

Thanks,
Ray