[v2,2/2] drm/amdgpu: export test ring debugfs interface

Submitted by Huang, Ray on May 11, 2017, 5:42 a.m.

Details

Message ID 1494481373-2237-2-git-send-email-ray.huang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Huang, Ray May 11, 2017, 5:42 a.m.
Signed-off-by: Huang Rui <ray.huang@amd.com>
---

V1 -> V2:
- park the scheduler thread for each ring to avoid conflict with commands from
  active apps.

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 50 ++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 19ac196..04a63b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3643,14 +3643,60 @@  static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int amdgpu_ring_tests(struct amdgpu_device *adev)
+{
+	unsigned i;
+	int r = 0;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->ready || !ring->sched.thread)
+			continue;
+
+		/* hold on the scheduler */
+		kthread_park(ring->sched.thread);
+
+		r = amdgpu_ring_test_ring(ring);
+		if (r) {
+			ring->ready = false;
+			DRM_ERROR("amdgpu: failed to test ring %d (%d).\n",
+				  i, r);
+		}
+
+		/* go on the scheduler */
+		kthread_unpark(ring->sched.thread);
+	}
+
+	return r;
+}
+
+static int amdgpu_debugfs_test_ring(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	int r = 0;
+
+	seq_printf(m, "run ring test:\n");
+	r = amdgpu_ring_tests(adev);
+	if (r)
+		seq_printf(m, "ring tests failed (%d).\n", r);
+	else
+		seq_printf(m, "ring tests passed.\n");
+
+	return 0;
+}
+
 static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
-	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
+	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
+	{"amdgpu_test_ring", &amdgpu_debugfs_test_ring}
 };
 
 static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
 {
 	return amdgpu_debugfs_add_files(adev,
-					amdgpu_debugfs_test_ib_ring_list, 1);
+					amdgpu_debugfs_test_ib_ring_list, 2);
 }
 
 int amdgpu_debugfs_init(struct drm_minor *minor)

Comments

Am 11.05.2017 um 07:42 schrieb Huang Rui:
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> ---
>
> V1 -> V2:
> - park the scheduler thread for each ring to avoid conflict with commands from
>    active apps.
>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 50 ++++++++++++++++++++++++++++--
>   1 file changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 19ac196..04a63b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3643,14 +3643,60 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
>   	return 0;
>   }
>   
> +static int amdgpu_ring_tests(struct amdgpu_device *adev)
> +{
> +	unsigned i;
> +	int r = 0;
> +
> +	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> +		struct amdgpu_ring *ring = adev->rings[i];
> +
> +		if (!ring || !ring->ready || !ring->sched.thread)
> +			continue;
> +
> +		/* hold on the scheduler */
> +		kthread_park(ring->sched.thread);
> +
> +		r = amdgpu_ring_test_ring(ring);
> +		if (r) {
> +			ring->ready = false;

Don't mess with the ready flag here.

> +			DRM_ERROR("amdgpu: failed to test ring %d (%d).\n",
> +				  i, r);
> +		}
> +
> +		/* go on the scheduler */
> +		kthread_unpark(ring->sched.thread);
> +	}
> +
> +	return r;
> +}
> +
> +static int amdgpu_debugfs_test_ring(struct seq_file *m, void *data)
> +{
> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct amdgpu_device *adev = dev->dev_private;
> +	int r = 0;
> +
> +	seq_printf(m, "run ring test:\n");
> +	r = amdgpu_ring_tests(adev);

Why a separate function for this?

Additional to that I agree with Dave that when we have the IB test the 
ring test is not necessary any more.

We just do this on boot/resume separately to be able to narrow down 
problems faster when we see in the logs that one fails but the other 
succeeds.

Christian.

> +	if (r)
> +		seq_printf(m, "ring tests failed (%d).\n", r);
> +	else
> +		seq_printf(m, "ring tests passed.\n");
> +
> +	return 0;
> +}
> +
>   static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
> -	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
> +	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
> +	{"amdgpu_test_ring", &amdgpu_debugfs_test_ring}
>   };
>   
>   static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
>   {
>   	return amdgpu_debugfs_add_files(adev,
> -					amdgpu_debugfs_test_ib_ring_list, 1);
> +					amdgpu_debugfs_test_ib_ring_list, 2);
>   }
>   
>   int amdgpu_debugfs_init(struct drm_minor *minor)
On Thu, May 11, 2017 at 02:41:56PM +0800, Christian König wrote:
> Am 11.05.2017 um 07:42 schrieb Huang Rui:
> > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > ---
> >
> > V1 -> V2:
> > - park the scheduler thread for each ring to avoid conflict with commands
> from
> >    active apps.
> >
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 50
> ++++++++++++++++++++++++++++--
> >   1 file changed, 48 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd
> /amdgpu/amdgpu_device.c
> > index 19ac196..04a63b5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -3643,14 +3643,60 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m,
> void *data)
> >        return 0;
> >   }
> >  
> > +static int amdgpu_ring_tests(struct amdgpu_device *adev)
> > +{
> > +     unsigned i;
> > +     int r = 0;
> > +
> > +     for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> > +             struct amdgpu_ring *ring = adev->rings[i];
> > +
> > +             if (!ring || !ring->ready || !ring->sched.thread)
> > +                     continue;
> > +
> > +             /* hold on the scheduler */
> > +             kthread_park(ring->sched.thread);
> > +
> > +             r = amdgpu_ring_test_ring(ring);
> > +             if (r) {
> > +                     ring->ready = false;
> 
> Don't mess with the ready flag here.
> 
> > +                     DRM_ERROR("amdgpu: failed to test ring %d (%d).\n",
> > +                               i, r);
> > +             }
> > +
> > +             /* go on the scheduler */
> > +             kthread_unpark(ring->sched.thread);
> > +     }
> > +
> > +     return r;
> > +}
> > +
> > +static int amdgpu_debugfs_test_ring(struct seq_file *m, void *data)
> > +{
> > +     struct drm_info_node *node = (struct drm_info_node *) m->private;
> > +     struct drm_device *dev = node->minor->dev;
> > +     struct amdgpu_device *adev = dev->dev_private;
> > +     int r = 0;
> > +
> > +     seq_printf(m, "run ring test:\n");
> > +     r = amdgpu_ring_tests(adev);
> 
> Why a separate function for this?
> 

I think it might be re-used by other side in future.

> Additional to that I agree with Dave that when we have the IB test the
> ring test is not necessary any more.
> 
> We just do this on boot/resume separately to be able to narrow down
> problems faster when we see in the logs that one fails but the other
> succeeds.
> 

Yeah, you know, we only want to expose ib test orignally. When I write that
codes, I found the ring tests are also able to re-use the debugfs list. :)

Is there anything that might break the ring at runtime? If not, I can drop
this patch.

Thanks,
Rui
Am 11.05.2017 um 09:35 schrieb Huang Rui:
> On Thu, May 11, 2017 at 02:41:56PM +0800, Christian König wrote:
>> Am 11.05.2017 um 07:42 schrieb Huang Rui:
>>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>>> ---
>>>
>>> V1 -> V2:
>>> - park the scheduler thread for each ring to avoid conflict with commands
>> from
>>>     active apps.
>>>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 50
>> ++++++++++++++++++++++++++++--
>>>    1 file changed, 48 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd
>> /amdgpu/amdgpu_device.c
>>> index 19ac196..04a63b5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3643,14 +3643,60 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m,
>> void *data)
>>>         return 0;
>>>    }
>>>   
>>> +static int amdgpu_ring_tests(struct amdgpu_device *adev)
>>> +{
>>> +     unsigned i;
>>> +     int r = 0;
>>> +
>>> +     for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> +             struct amdgpu_ring *ring = adev->rings[i];
>>> +
>>> +             if (!ring || !ring->ready || !ring->sched.thread)
>>> +                     continue;
>>> +
>>> +             /* hold on the scheduler */
>>> +             kthread_park(ring->sched.thread);
>>> +
>>> +             r = amdgpu_ring_test_ring(ring);
>>> +             if (r) {
>>> +                     ring->ready = false;
>> Don't mess with the ready flag here.
>>
>>> +                     DRM_ERROR("amdgpu: failed to test ring %d (%d).\n",
>>> +                               i, r);
>>> +             }
>>> +
>>> +             /* go on the scheduler */
>>> +             kthread_unpark(ring->sched.thread);
>>> +     }
>>> +
>>> +     return r;
>>> +}
>>> +
>>> +static int amdgpu_debugfs_test_ring(struct seq_file *m, void *data)
>>> +{
>>> +     struct drm_info_node *node = (struct drm_info_node *) m->private;
>>> +     struct drm_device *dev = node->minor->dev;
>>> +     struct amdgpu_device *adev = dev->dev_private;
>>> +     int r = 0;
>>> +
>>> +     seq_printf(m, "run ring test:\n");
>>> +     r = amdgpu_ring_tests(adev);
>> Why a separate function for this?
>>
> I think it might be re-used by other side in future.

Well in this case we should create the function when we actually use it 
and not just because a possible use some times in the future.

>> Additional to that I agree with Dave that when we have the IB test the
>> ring test is not necessary any more.
>>
>> We just do this on boot/resume separately to be able to narrow down
>> problems faster when we see in the logs that one fails but the other
>> succeeds.
>>
> Yeah, you know, we only want to expose ib test orignally. When I write that
> codes, I found the ring tests are also able to re-use the debugfs list. :)
>
> Is there anything that might break the ring at runtime? If not, I can drop
> this patch.

Yeah, the ring tests messes with the ready flags, so I would rather drop 
this patch.

Christian.

>
> Thanks,
> Rui