[i-g-t,06/13] i915/gem_exec_schedule: Measure semaphore power consumption

Submitted by Chris Wilson on Feb. 4, 2019, 8:36 a.m.

Details

Message ID 20190204083614.2385-6-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in IGT

Not browsing as part of any series.

Commit Message

Chris Wilson Feb. 4, 2019, 8:36 a.m.
How much energy does spinning on a semaphore consume relative to plain
old spinning?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 0462ce84f..184ceb7d6 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,9 +29,10 @@ 
 #include <signal.h>
 
 #include "igt.h"
-#include "igt_vgem.h"
+#include "igt_gpu_power.h"
 #include "igt_rand.h"
 #include "igt_sysfs.h"
+#include "igt_vgem.h"
 #include "i915/gem_ring.h"
 
 #define LO 0
@@ -1191,6 +1192,65 @@  static void test_pi_ringfull(int fd, unsigned int engine)
 	munmap(result, 4096);
 }
 
+static void measure_semaphore_power(int i915)
+{
+	struct gpu_power power;
+	unsigned int engine, signaler;
+
+	igt_require(gpu_power_open(&power) == 0);
+
+	for_each_physical_engine(i915, signaler) {
+		struct gpu_power_sample s_spin[2];
+		struct gpu_power_sample s_sema[2];
+		double baseline, total;
+		int64_t jiffie = 1;
+		igt_spin_t *spin;
+
+		spin = __igt_spin_batch_new(i915,
+					    .engine = signaler,
+					    .flags = IGT_SPIN_POLL_RUN);
+		gem_wait(i915, spin->handle, &jiffie); /* waitboost */
+		igt_assert(spin->running);
+		igt_spin_busywait_until_running(spin);
+
+		gpu_power_read(&power, &s_spin[0]);
+		usleep(100*1000);
+		gpu_power_read(&power, &s_spin[1]);
+
+		/* Add a waiter to each engine */
+		for_each_physical_engine(i915, engine) {
+			igt_spin_t *sema;
+
+			if (engine == signaler)
+				continue;
+
+			sema = __igt_spin_batch_new(i915,
+						    .engine = engine,
+						    .dependency = spin->handle);
+
+			igt_spin_batch_free(i915, sema);
+		}
+		usleep(10); /* just give the tasklets a chance to run */
+
+		gpu_power_read(&power, &s_sema[0]);
+		usleep(100*1000);
+		gpu_power_read(&power, &s_sema[1]);
+
+		igt_spin_batch_free(i915, spin);
+
+		baseline = gpu_power_W(&power, &s_spin[0], &s_spin[1]);
+		total = gpu_power_W(&power, &s_sema[0], &s_sema[1]);
+
+		igt_info("%s: %.1fmW + %.1fmW (total %1.fmW)\n",
+			 e__->name,
+			 1e3 * baseline,
+			 1e3 * (total - baseline),
+			 1e3 * total);
+	}
+
+	gpu_power_close(&power);
+}
+
 igt_main
 {
 	const struct intel_execution_engine *e;
@@ -1351,6 +1411,16 @@  igt_main
 		}
 	}
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_scheduler_enabled(fd));
+			igt_require(gem_scheduler_has_semaphores(fd));
+		}
+
+		igt_subtest("semaphore-power")
+			measure_semaphore_power(fd);
+	}
+
 	igt_fixture {
 		igt_stop_hang_detector();
 		close(fd);

Comments

On Mon, Feb 04, 2019 at 08:36:07AM +0000, Chris Wilson wrote:
> How much energy does spinning on a semaphore consume relative to plain
> old spinning?
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Is this patch relying on something more then:
bad9d8d0 lib: Add GPU power measurement ? Because I got errors on compilation:

'../tests/i915/gem_exec_schedule.c:1417:16: error: implicit declaration of
function ‘gem_scheduler_has_semaphores’; did you mean
‘gem_scheduler_has_preemption’? [-Werror=implicit-function-declaration]'
I've seen some patches on intel-gfx, but not in this series.

> ---
>  tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
>  1 file changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index 0462ce84f..184ceb7d6 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -29,9 +29,10 @@
>  #include <signal.h>
>  
>  #include "igt.h"
> -#include "igt_vgem.h"
> +#include "igt_gpu_power.h"
>  #include "igt_rand.h"
>  #include "igt_sysfs.h"
> +#include "igt_vgem.h"
>  #include "i915/gem_ring.h"
>  
>  #define LO 0
> @@ -1191,6 +1192,65 @@ static void test_pi_ringfull(int fd, unsigned int engine)
>  	munmap(result, 4096);
>  }
>  
> +static void measure_semaphore_power(int i915)
> +{
> +	struct gpu_power power;
Where power struct is initialized?
> +	unsigned int engine, signaler;
How does signaler differ from engine (in usage)?
> +
> +	igt_require(gpu_power_open(&power) == 0);
> +
> +	for_each_physical_engine(i915, signaler) {
> +		struct gpu_power_sample s_spin[2];
> +		struct gpu_power_sample s_sema[2];
> +		double baseline, total;
> +		int64_t jiffie = 1;
> +		igt_spin_t *spin;
> +
> +		spin = __igt_spin_batch_new(i915,
> +					    .engine = signaler,
> +					    .flags = IGT_SPIN_POLL_RUN);
> +		gem_wait(i915, spin->handle, &jiffie); /* waitboost */
> +		igt_assert(spin->running);
> +		igt_spin_busywait_until_running(spin);
> +
> +		gpu_power_read(&power, &s_spin[0]);
> +		usleep(100*1000);
> +		gpu_power_read(&power, &s_spin[1]);
Shouldn't we be checking results of gpu_power_read in both cases?
> +
> +		/* Add a waiter to each engine */
> +		for_each_physical_engine(i915, engine) {
> +			igt_spin_t *sema;
> +
> +			if (engine == signaler)
> +				continue;
> +
> +			sema = __igt_spin_batch_new(i915,
> +						    .engine = engine,
> +						    .dependency = spin->handle);
> +
> +			igt_spin_batch_free(i915, sema);
> +		}
> +		usleep(10); /* just give the tasklets a chance to run */
> +
> +		gpu_power_read(&power, &s_sema[0]);
> +		usleep(100*1000);
> +		gpu_power_read(&power, &s_sema[1]);
Same as above.

Kasia :)
> +
> +		igt_spin_batch_free(i915, spin);
> +
> +		baseline = gpu_power_W(&power, &s_spin[0], &s_spin[1]);
> +		total = gpu_power_W(&power, &s_sema[0], &s_sema[1]);
> +
> +		igt_info("%s: %.1fmW + %.1fmW (total %1.fmW)\n",
> +			 e__->name,
> +			 1e3 * baseline,
> +			 1e3 * (total - baseline),
> +			 1e3 * total);
> +	}
> +
> +	gpu_power_close(&power);
> +}
> +
>  igt_main
>  {
>  	const struct intel_execution_engine *e;
> @@ -1351,6 +1411,16 @@ igt_main
>  		}
>  	}
>  
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_scheduler_enabled(fd));
> +			igt_require(gem_scheduler_has_semaphores(fd));
> +		}
> +
> +		igt_subtest("semaphore-power")
> +			measure_semaphore_power(fd);
> +	}
> +
>  	igt_fixture {
>  		igt_stop_hang_detector();
>  		close(fd);
> -- 
> 2.20.1
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
Quoting Katarzyna Dec (2019-02-05 12:50:38)
> On Mon, Feb 04, 2019 at 08:36:07AM +0000, Chris Wilson wrote:
> > How much energy does spinning on a semaphore consume relative to plain
> > old spinning?
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Is this patch relying on something more then:
> bad9d8d0 lib: Add GPU power measurement ? Because I got errors on compilation:
> 
> '../tests/i915/gem_exec_schedule.c:1417:16: error: implicit declaration of
> function ‘gem_scheduler_has_semaphores’; did you mean
> ‘gem_scheduler_has_preemption’? [-Werror=implicit-function-declaration]'
> I've seen some patches on intel-gfx, but not in this series.

That'll be patch 4/13.

> > ---
> >  tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
> >  1 file changed, 71 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> > index 0462ce84f..184ceb7d6 100644
> > --- a/tests/i915/gem_exec_schedule.c
> > +++ b/tests/i915/gem_exec_schedule.c
> > @@ -29,9 +29,10 @@
> >  #include <signal.h>
> >  
> >  #include "igt.h"
> > -#include "igt_vgem.h"
> > +#include "igt_gpu_power.h"
> >  #include "igt_rand.h"
> >  #include "igt_sysfs.h"
> > +#include "igt_vgem.h"
> >  #include "i915/gem_ring.h"
> >  
> >  #define LO 0
> > @@ -1191,6 +1192,65 @@ static void test_pi_ringfull(int fd, unsigned int engine)
> >       munmap(result, 4096);
> >  }
> >  
> > +static void measure_semaphore_power(int i915)
> > +{
> > +     struct gpu_power power;
> Where power struct is initialized?
> > +     unsigned int engine, signaler;
> How does signaler differ from engine (in usage)?

signaler tells engines to go.

> > +     igt_require(gpu_power_open(&power) == 0);

This initialises the struct gpu_power.

> > +     for_each_physical_engine(i915, signaler) {
> > +             struct gpu_power_sample s_spin[2];
> > +             struct gpu_power_sample s_sema[2];
> > +             double baseline, total;
> > +             int64_t jiffie = 1;
> > +             igt_spin_t *spin;
> > +
> > +             spin = __igt_spin_batch_new(i915,
> > +                                         .engine = signaler,
> > +                                         .flags = IGT_SPIN_POLL_RUN);
> > +             gem_wait(i915, spin->handle, &jiffie); /* waitboost */
> > +             igt_assert(spin->running);
> > +             igt_spin_busywait_until_running(spin);
> > +
> > +             gpu_power_read(&power, &s_spin[0]);
> > +             usleep(100*1000);
> > +             gpu_power_read(&power, &s_spin[1]);
> Shouldn't we be checking results of gpu_power_read in both cases?

Why? If it fails the output is garbage, which is apparent in the info.
-Chris
On Tue, Feb 05, 2019 at 12:53:14PM +0000, Chris Wilson wrote:
> Quoting Katarzyna Dec (2019-02-05 12:50:38)
> > On Mon, Feb 04, 2019 at 08:36:07AM +0000, Chris Wilson wrote:
> > > How much energy does spinning on a semaphore consume relative to plain
> > > old spinning?
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Is this patch relying on something more then:
> > bad9d8d0 lib: Add GPU power measurement ? Because I got errors on compilation:
> > 
> > '../tests/i915/gem_exec_schedule.c:1417:16: error: implicit declaration of
> > function ‘gem_scheduler_has_semaphores’; did you mean
> > ‘gem_scheduler_has_preemption’? [-Werror=implicit-function-declaration]'
> > I've seen some patches on intel-gfx, but not in this series.
> 
> That'll be patch 4/13.
I new I've seen the code somewhere :)

> 
> > > ---
> > >  tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
> > >  1 file changed, 71 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> > > index 0462ce84f..184ceb7d6 100644
> > > --- a/tests/i915/gem_exec_schedule.c
> > > +++ b/tests/i915/gem_exec_schedule.c
> > > @@ -29,9 +29,10 @@
> > >  #include <signal.h>
> > >  
> > >  #include "igt.h"
> > > -#include "igt_vgem.h"
> > > +#include "igt_gpu_power.h"
> > >  #include "igt_rand.h"
> > >  #include "igt_sysfs.h"
> > > +#include "igt_vgem.h"
> > >  #include "i915/gem_ring.h"
> > >  
> > >  #define LO 0
> > > @@ -1191,6 +1192,65 @@ static void test_pi_ringfull(int fd, unsigned int engine)
> > >       munmap(result, 4096);
> > >  }
> > >  
> > > +static void measure_semaphore_power(int i915)
> > > +{
> > > +     struct gpu_power power;
> > Where power struct is initialized?
> > > +     unsigned int engine, signaler;
> > How does signaler differ from engine (in usage)?
> 
> signaler tells engines to go.

> 
> > > +     igt_require(gpu_power_open(&power) == 0);
> 
> This initialises the struct gpu_power.
> 
> > > +     for_each_physical_engine(i915, signaler) {
> > > +             struct gpu_power_sample s_spin[2];
> > > +             struct gpu_power_sample s_sema[2];
> > > +             double baseline, total;
> > > +             int64_t jiffie = 1;
> > > +             igt_spin_t *spin;
> > > +
> > > +             spin = __igt_spin_batch_new(i915,
> > > +                                         .engine = signaler,
> > > +                                         .flags = IGT_SPIN_POLL_RUN);
> > > +             gem_wait(i915, spin->handle, &jiffie); /* waitboost */
> > > +             igt_assert(spin->running);
> > > +             igt_spin_busywait_until_running(spin);
> > > +
> > > +             gpu_power_read(&power, &s_spin[0]);
> > > +             usleep(100*1000);
> > > +             gpu_power_read(&power, &s_spin[1]);
> > Shouldn't we be checking results of gpu_power_read in both cases?
> 
> Why? If it fails the output is garbage, which is apparent in the info.
Fair enough.
> -Chris

I guess all LGTM.
Reviewed-by: Katarzyna Dec <katarzyna.dec@intel.com>
Kasia :)