igt/gem_exec_schedule: Exercise preemption timeout

Submitted by Chris Wilson on April 13, 2018, 2:14 p.m.

Details

Message ID 20180413141409.8774-1-chris@chris-wilson.co.uk
State New
Headers show
Series "igt/gem_exec_schedule: Exercise preemption timeout" ( rev: 1 ) in IGT (deprecated)

Not browsing as part of any series.

Commit Message

Chris Wilson April 13, 2018, 2:14 p.m.
Set up a unpreemptible spinner such that the only way we can inject a
high priority request onto the GPU is by resetting the spinner. The test
fails if we trigger hangcheck rather than the fast timeout mechanism.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/i915/gem_context.c    | 72 +++++++++++++++++++++++++++++++--------
 lib/i915/gem_context.h    |  3 ++
 lib/igt_dummyload.c       | 12 +++++--
 lib/igt_dummyload.h       |  3 ++
 tests/gem_exec_schedule.c | 34 ++++++++++++++++++
 5 files changed, 106 insertions(+), 18 deletions(-)

Patch hide | download patch | download mbox

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index 669bd318c..c76f78d97 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -112,15 +112,17 @@  uint32_t gem_context_create(int fd)
 int __gem_context_destroy(int fd, uint32_t ctx_id)
 {
 	struct drm_i915_gem_context_destroy destroy;
-	int ret;
+	int err;
 
 	memset(&destroy, 0, sizeof(destroy));
 	destroy.ctx_id = ctx_id;
 
-	ret = igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
-	if (ret)
-		return -errno;
-	return 0;
+	err = 0;
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy))
+		err = -errno;
+
+	errno = 0;
+	return err;
 }
 
 /**
@@ -142,11 +144,14 @@  void gem_context_destroy(int fd, uint32_t ctx_id)
 
 int __gem_context_get_param(int fd, struct drm_i915_gem_context_param *p)
 {
+	int err;
+
+	err = 0;
 	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, p))
-		return -errno;
+		err = -errno;
 
 	errno = 0;
-	return 0;
+	return err;
 }
 
 /**
@@ -159,17 +164,20 @@  int __gem_context_get_param(int fd, struct drm_i915_gem_context_param *p)
  */
 void gem_context_get_param(int fd, struct drm_i915_gem_context_param *p)
 {
-	igt_assert(__gem_context_get_param(fd, p) == 0);
+	igt_assert_eq(__gem_context_get_param(fd, p), 0);
 }
 
 
 int __gem_context_set_param(int fd, struct drm_i915_gem_context_param *p)
 {
+	int err;
+
+	err = 0;
 	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, p))
-		return -errno;
+		err = -errno;
 
 	errno = 0;
-	return 0;
+	return err;
 }
 /**
  * gem_context_set_param:
@@ -181,7 +189,7 @@  int __gem_context_set_param(int fd, struct drm_i915_gem_context_param *p)
  */
 void gem_context_set_param(int fd, struct drm_i915_gem_context_param *p)
 {
-	igt_assert(__gem_context_set_param(fd, p) == 0);
+	igt_assert_eq(__gem_context_set_param(fd, p), 0);
 }
 
 /**
@@ -234,8 +242,6 @@  void gem_context_require_bannable(int fd)
 	igt_require(has_ban_period || has_bannable);
 }
 
-#define DRM_I915_CONTEXT_PARAM_PRIORITY 0x6
-
 /**
  * __gem_context_set_priority:
  * @fd: open i915 drm file descriptor
@@ -255,7 +261,7 @@  int __gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
 	memset(&p, 0, sizeof(p));
 	p.ctx_id = ctx_id;
 	p.size = 0;
-	p.param = DRM_I915_CONTEXT_PARAM_PRIORITY;
+	p.param = I915_CONTEXT_PARAM_PRIORITY;
 	p.value = prio;
 
 	return __gem_context_set_param(fd, &p);
@@ -271,5 +277,41 @@  int __gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
  */
 void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
 {
-	igt_assert(__gem_context_set_priority(fd, ctx_id, prio) == 0);
+	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
+}
+
+/**
+ * __gem_context_set_preempt_timeout:
+ * @fd: open i915 drm file descriptor
+ * @ctx_id: i915 context id
+ * @timeout_ns: desired preempt_timeout
+ *
+ * Returns: An integer equal to zero for success and negative for failure
+ */
+int __gem_context_set_preempt_timeout(int fd, uint32_t ctx, uint64_t timeout_ns)
+{
+#define LOCAL_CONTEXT_PARAM_PREEMPT_TIMEOUT 7
+	struct drm_i915_gem_context_param p;
+
+	memset(&p, 0, sizeof(p));
+	p.ctx_id = ctx;
+	p.size = 0;
+	p.param = LOCAL_CONTEXT_PARAM_PREEMPT_TIMEOUT;
+	p.value = timeout_ns;
+
+	return __gem_context_set_param(fd, &p);
+}
+
+/**
+ * gem_context_set_preempt_timeout:
+ * @fd: open i915 drm file descriptor
+ * @ctx: i915 context id
+ * @timeout_ns: desired preempt timeout
+ *
+ * Like __gem_context_set_priority(), except we assert on failure.
+ */
+void gem_context_set_preempt_timeout(int fd, uint32_t ctx, uint64_t timeout_ns)
+{
+	igt_assert_eq(__gem_context_set_preempt_timeout(fd, ctx, timeout_ns),
+		      0);
 }
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index aef68dda6..634bf584e 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -45,4 +45,7 @@  int __gem_context_get_param(int fd, struct drm_i915_gem_context_param *p);
 int __gem_context_set_priority(int fd, uint32_t ctx, int prio);
 void gem_context_set_priority(int fd, uint32_t ctx, int prio);
 
+int __gem_context_set_preempt_timeout(int fd, uint32_t ctx, uint64_t timeout_ns);
+void gem_context_set_preempt_timeout(int fd, uint32_t ctx, uint64_t timeout_ns);
+
 #endif /* GEM_CONTEXT_H */
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index ba917ba58..13aea704c 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -77,6 +77,7 @@  fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
 
 #define OUT_FENCE	(1 << 0)
 #define POLL_RUN	(1 << 1)
+#define NO_PREEMPTION   (1 << 2)
 
 static int
 emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
@@ -192,7 +193,8 @@  emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
 	spin->handle = obj[BATCH].handle;
 
 	/* Allow ourselves to be preempted */
-	*batch++ = MI_ARB_CHK;
+	if (!(flags & NO_PREEMPTION))
+		*batch++ = MI_ARB_CHK;
 
 	/* Pad with a few nops so that we do not completely hog the system.
 	 *
@@ -365,6 +367,12 @@  __igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine)
 	return ___igt_spin_batch_new(fd, ctx, engine, 0, POLL_RUN);
 }
 
+igt_spin_t *
+__igt_spin_batch_new_hang(int fd, uint32_t ctx, unsigned engine)
+{
+	return ___igt_spin_batch_new(fd, ctx, engine, 0, NO_PREEMPTION);
+}
+
 /**
  * igt_spin_batch_new_poll:
  * @fd: open i915 drm file descriptor
@@ -449,8 +457,6 @@  void igt_spin_batch_end(igt_spin_t *spin)
 	if (!spin)
 		return;
 
-	igt_assert(*spin->batch == MI_ARB_CHK ||
-		   *spin->batch == MI_BATCH_BUFFER_END);
 	*spin->batch = MI_BATCH_BUFFER_END;
 	__sync_synchronize();
 }
diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
index a8ec213fe..711aa1883 100644
--- a/lib/igt_dummyload.h
+++ b/lib/igt_dummyload.h
@@ -66,6 +66,9 @@  igt_spin_t *igt_spin_batch_new_poll(int fd,
 				    uint32_t ctx,
 				    unsigned engine);
 
+igt_spin_t *
+__igt_spin_batch_new_hang(int fd, uint32_t ctx, unsigned engine);
+
 void igt_spin_batch_set_timeout(igt_spin_t *spin, int64_t ns);
 void igt_spin_batch_end(igt_spin_t *spin);
 void igt_spin_batch_free(int fd, igt_spin_t *spin);
diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
index 6ff15b6ef..93254945b 100644
--- a/tests/gem_exec_schedule.c
+++ b/tests/gem_exec_schedule.c
@@ -656,6 +656,37 @@  static void preemptive_hang(int fd, unsigned ring)
 	gem_context_destroy(fd, ctx[HI]);
 }
 
+static void preempt_timeout(int fd, unsigned ring)
+{
+	igt_spin_t *spin[3];
+	uint32_t ctx;
+
+	igt_require(__gem_context_set_preempt_timeout(fd, 0, 0));
+
+	ctx = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx, MIN_PRIO);
+	spin[0] = __igt_spin_batch_new_hang(fd, ctx, ring);
+	spin[1] = __igt_spin_batch_new_hang(fd, ctx, ring);
+	gem_context_destroy(fd, ctx);
+
+	ctx = gem_context_create(fd);
+	gem_context_set_priority(fd, ctx, MAX_PRIO);
+	gem_context_set_preempt_timeout(fd, ctx, 1000 * 1000);
+	spin[2] = __igt_spin_batch_new(fd, ctx, ring, 0);
+	gem_context_destroy(fd, ctx);
+
+	igt_spin_batch_end(spin[2]);
+	gem_sync(fd, spin[2]->handle);
+
+	/* spin[0] is kicked, leaving spin[1] running */
+
+	igt_assert(gem_bo_busy(fd, spin[1]->handle));
+
+	igt_spin_batch_free(fd, spin[2]);
+	igt_spin_batch_free(fd, spin[1]);
+	igt_spin_batch_free(fd, spin[0]);
+}
+
 static void deep(int fd, unsigned ring)
 {
 #define XS 8
@@ -1120,6 +1151,9 @@  igt_main
 					igt_subtest_f("preempt-self-%s", e->name)
 						preempt_self(fd, e->exec_id | e->flags);
 
+					igt_subtest_f("preempt-timeout-%s", e->name)
+						preempt_timeout(fd, e->exec_id | e->flags);
+
 					igt_subtest_f("preempt-other-%s", e->name)
 						preempt_other(fd, e->exec_id | e->flags, 0);
 

Comments

On 13/04/18 07:14, Chris Wilson wrote:
> Set up a unpreemptible spinner such that the only way we can inject a
> high priority request onto the GPU is by resetting the spinner. The test
> fails if we trigger hangcheck rather than the fast timeout mechanism.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   lib/i915/gem_context.c    | 72 +++++++++++++++++++++++++++++++--------
>   lib/i915/gem_context.h    |  3 ++
>   lib/igt_dummyload.c       | 12 +++++--
>   lib/igt_dummyload.h       |  3 ++
>   tests/gem_exec_schedule.c | 34 ++++++++++++++++++
>   5 files changed, 106 insertions(+), 18 deletions(-)
> 

...

> @@ -449,8 +457,6 @@ void igt_spin_batch_end(igt_spin_t *spin)
>   	if (!spin)
>   		return;
>   
> -	igt_assert(*spin->batch == MI_ARB_CHK ||
> -		   *spin->batch == MI_BATCH_BUFFER_END);

I am not sure why we needed this, but it seems safe to remove.

>   	*spin->batch = MI_BATCH_BUFFER_END;
>   	__sync_synchronize();
>   }

> diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
> index 6ff15b6ef..93254945b 100644
> --- a/tests/gem_exec_schedule.c
> +++ b/tests/gem_exec_schedule.c
> @@ -656,6 +656,37 @@ static void preemptive_hang(int fd, unsigned ring)
>   	gem_context_destroy(fd, ctx[HI]);
>   }
>   
> +static void preempt_timeout(int fd, unsigned ring)
> +{
> +	igt_spin_t *spin[3];
> +	uint32_t ctx;
> +
> +	igt_require(__gem_context_set_preempt_timeout(fd, 0, 0));
> +
> +	ctx = gem_context_create(fd);
> +	gem_context_set_priority(fd, ctx, MIN_PRIO);
> +	spin[0] = __igt_spin_batch_new_hang(fd, ctx, ring);
> +	spin[1] = __igt_spin_batch_new_hang(fd, ctx, ring);
> +	gem_context_destroy(fd, ctx);
> +
> +	ctx = gem_context_create(fd);
> +	gem_context_set_priority(fd, ctx, MAX_PRIO);
> +	gem_context_set_preempt_timeout(fd, ctx, 1000 * 1000);
> +	spin[2] = __igt_spin_batch_new(fd, ctx, ring, 0);
> +	gem_context_destroy(fd, ctx);
> +
> +	igt_spin_batch_end(spin[2]);
> +	gem_sync(fd, spin[2]->handle);

Does this guarantee that spin[1] did not overtake spin[2]?

Thanks,
Antonio

> +
> +	/* spin[0] is kicked, leaving spin[1] running */
> +
> +	igt_assert(gem_bo_busy(fd, spin[1]->handle));
> +
> +	igt_spin_batch_free(fd, spin[2]);
> +	igt_spin_batch_free(fd, spin[1]);
> +	igt_spin_batch_free(fd, spin[0]);
> +}
> +
>   static void deep(int fd, unsigned ring)
>   {
>   #define XS 8
> @@ -1120,6 +1151,9 @@ igt_main
>   					igt_subtest_f("preempt-self-%s", e->name)
>   						preempt_self(fd, e->exec_id | e->flags);
>   
> +					igt_subtest_f("preempt-timeout-%s", e->name)
> +						preempt_timeout(fd, e->exec_id | e->flags);
> +
>   					igt_subtest_f("preempt-other-%s", e->name)
>   						preempt_other(fd, e->exec_id | e->flags, 0);
>   
>
Quoting Antonio Argenziano (2018-04-13 16:54:27)
> 
> 
> On 13/04/18 07:14, Chris Wilson wrote:
> > Set up a unpreemptible spinner such that the only way we can inject a
> > high priority request onto the GPU is by resetting the spinner. The test
> > fails if we trigger hangcheck rather than the fast timeout mechanism.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   lib/i915/gem_context.c    | 72 +++++++++++++++++++++++++++++++--------
> >   lib/i915/gem_context.h    |  3 ++
> >   lib/igt_dummyload.c       | 12 +++++--
> >   lib/igt_dummyload.h       |  3 ++
> >   tests/gem_exec_schedule.c | 34 ++++++++++++++++++
> >   5 files changed, 106 insertions(+), 18 deletions(-)
> > 
> 
> ...
> 
> > @@ -449,8 +457,6 @@ void igt_spin_batch_end(igt_spin_t *spin)
> >       if (!spin)
> >               return;
> >   
> > -     igt_assert(*spin->batch == MI_ARB_CHK ||
> > -                *spin->batch == MI_BATCH_BUFFER_END);
> 
> I am not sure why we needed this, but it seems safe to remove.
> 
> >       *spin->batch = MI_BATCH_BUFFER_END;
> >       __sync_synchronize();
> >   }
> 
> > diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
> > index 6ff15b6ef..93254945b 100644
> > --- a/tests/gem_exec_schedule.c
> > +++ b/tests/gem_exec_schedule.c
> > @@ -656,6 +656,37 @@ static void preemptive_hang(int fd, unsigned ring)
> >       gem_context_destroy(fd, ctx[HI]);
> >   }
> >   
> > +static void preempt_timeout(int fd, unsigned ring)
> > +{
> > +     igt_spin_t *spin[3];
> > +     uint32_t ctx;
> > +
> > +     igt_require(__gem_context_set_preempt_timeout(fd, 0, 0));
> > +
> > +     ctx = gem_context_create(fd);
> > +     gem_context_set_priority(fd, ctx, MIN_PRIO);
> > +     spin[0] = __igt_spin_batch_new_hang(fd, ctx, ring);
> > +     spin[1] = __igt_spin_batch_new_hang(fd, ctx, ring);
> > +     gem_context_destroy(fd, ctx);
> > +
> > +     ctx = gem_context_create(fd);
> > +     gem_context_set_priority(fd, ctx, MAX_PRIO);
> > +     gem_context_set_preempt_timeout(fd, ctx, 1000 * 1000);
> > +     spin[2] = __igt_spin_batch_new(fd, ctx, ring, 0);
> > +     gem_context_destroy(fd, ctx);
> > +
> > +     igt_spin_batch_end(spin[2]);
> > +     gem_sync(fd, spin[2]->handle);
> 
> Does this guarantee that spin[1] did not overtake spin[2]?

It does as well. Neither spin[0] or spin[1] can complete without being
reset at this point. If they are reset (by hangcheck) we detect that and
die. What we expect to happen is spin[0] is (more or less, there is still
dmesg) silently killed by the preempt timeout. If that timeout doesn't
happen, more hangcheck. What we don't check here is how quick. Now we
could reasonably assert that the spin[2] -> gem_sync takes less than 2ms.
-Chris
On 13/04/18 08:59, Chris Wilson wrote:
> Quoting Antonio Argenziano (2018-04-13 16:54:27)
>>
>>
>> On 13/04/18 07:14, Chris Wilson wrote:
>>> Set up a unpreemptible spinner such that the only way we can inject a
>>> high priority request onto the GPU is by resetting the spinner. The test
>>> fails if we trigger hangcheck rather than the fast timeout mechanism.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    lib/i915/gem_context.c    | 72 +++++++++++++++++++++++++++++++--------
>>>    lib/i915/gem_context.h    |  3 ++
>>>    lib/igt_dummyload.c       | 12 +++++--
>>>    lib/igt_dummyload.h       |  3 ++
>>>    tests/gem_exec_schedule.c | 34 ++++++++++++++++++
>>>    5 files changed, 106 insertions(+), 18 deletions(-)
>>>
>>
>> ...
>>
>>> @@ -449,8 +457,6 @@ void igt_spin_batch_end(igt_spin_t *spin)
>>>        if (!spin)
>>>                return;
>>>    
>>> -     igt_assert(*spin->batch == MI_ARB_CHK ||
>>> -                *spin->batch == MI_BATCH_BUFFER_END);
>>
>> I am not sure why we needed this, but it seems safe to remove.
>>
>>>        *spin->batch = MI_BATCH_BUFFER_END;
>>>        __sync_synchronize();
>>>    }
>>
>>> diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
>>> index 6ff15b6ef..93254945b 100644
>>> --- a/tests/gem_exec_schedule.c
>>> +++ b/tests/gem_exec_schedule.c
>>> @@ -656,6 +656,37 @@ static void preemptive_hang(int fd, unsigned ring)
>>>        gem_context_destroy(fd, ctx[HI]);
>>>    }
>>>    
>>> +static void preempt_timeout(int fd, unsigned ring)
>>> +{
>>> +     igt_spin_t *spin[3];
>>> +     uint32_t ctx;
>>> +
>>> +     igt_require(__gem_context_set_preempt_timeout(fd, 0, 0));
>>> +
>>> +     ctx = gem_context_create(fd);
>>> +     gem_context_set_priority(fd, ctx, MIN_PRIO);
>>> +     spin[0] = __igt_spin_batch_new_hang(fd, ctx, ring);
>>> +     spin[1] = __igt_spin_batch_new_hang(fd, ctx, ring);
Should we send MAX_ELSP_QLEN batches to match other preemption tests?

>>> +     gem_context_destroy(fd, ctx);
>>> +
>>> +     ctx = gem_context_create(fd);
>>> +     gem_context_set_priority(fd, ctx, MAX_PRIO);
>>> +     gem_context_set_preempt_timeout(fd, ctx, 1000 * 1000);
>>> +     spin[2] = __igt_spin_batch_new(fd, ctx, ring, 0);
>>> +     gem_context_destroy(fd, ctx);
>>> +
>>> +     igt_spin_batch_end(spin[2]);
>>> +     gem_sync(fd, spin[2]->handle);
>>
>> Does this guarantee that spin[1] did not overtake spin[2]?
> 
> It does as well. Neither spin[0] or spin[1] can complete without being
> reset at this point. If they are reset (by hangcheck) we detect that and

Cool.

> die. What we expect to happen is spin[0] is (more or less, there is still
> dmesg) silently killed by the preempt timeout. If that timeout doesn't

The silent part is interesting, how do we make sure that during normal 
preemption operations (e.g. preempt on an ARB_CHECK) we didn't silently 
discard the preempted batch? Do we care?

Test looks good,
Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>

Thanks,
Antonio

> happen, more hangcheck. What we don't check here is how quick. Now we
> could reasonably assert that the spin[2] -> gem_sync takes less than 2ms.
> -Chris
>
Quoting Antonio Argenziano (2018-04-13 18:20:02)
> 
> 
> On 13/04/18 08:59, Chris Wilson wrote:
> > die. What we expect to happen is spin[0] is (more or less, there is still
> > dmesg) silently killed by the preempt timeout. If that timeout doesn't
> 
> The silent part is interesting, how do we make sure that during normal 
> preemption operations (e.g. preempt on an ARB_CHECK) we didn't silently 
> discard the preempted batch? Do we care?

Not particularly. From our point of view, the goal is that the high
priority spin[2] runs, no matter what. If the other requests cooperate,
that works out best for them.

The challenge for the test itself is detecting when the timeout was hit.
We aren't particularly good at demonstrating the spinner doesn't block
preemption, it is demonstrated in other tests, but we don't assert that
it is so.
-Chris