[2/2] igt/pm_rps: Add checks for freq = idle (RPn) in specific cases.

Submitted by Bob Paauwe on Dec. 1, 2015, 12:23 a.m.

Details

Message ID 1448929399-24498-3-git-send-email-bob.j.paauwe@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 2 1 ) in Intel GFX

Browsing this patch as part of:
"Series without cover letter" rev 1 in Intel GFX
<< prev patch [2/2] next patch >>

Commit Message

Bob Paauwe Dec. 1, 2015, 12:23 a.m.
Now that the frequency can drop below the user specified minimum when
the gpu is idle, add some checking to verify that it really does drop
down to the RPn frequency in specific cases.

To do this, modify the main test flow to take two 'check' routines
instead of one. When running the config-min-max-idle subtest make
use of the second check routine to verify that the frequency drops
to RPn instead of simply <= user min frequency.  For all other
subtests, use the original check routines for both checks.

Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
---
 tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/tests/pm_rps.c b/tests/pm_rps.c
index f919625..96225ce 100644
--- a/tests/pm_rps.c
+++ b/tests/pm_rps.c
@@ -364,7 +364,7 @@  static int get_hw_rounded_freq(int target)
 	return ret;
 }
 
-static void min_max_config(void (*check)(void), bool load_gpu)
+static void min_max_config(void (*check)(void), void (*check2)(void), bool load_gpu)
 {
 	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
 
@@ -384,7 +384,7 @@  static void min_max_config(void (*check)(void), bool load_gpu)
 	writeval(stuff[MAX].filp, origfreqs[RP0]);
 	if (load_gpu)
 		do_load_gpu();
-	check();
+	check2();
 
 	igt_debug("\nIncrease min to midpoint...\n");
 	writeval(stuff[MIN].filp, fmid);
@@ -412,7 +412,7 @@  static void min_max_config(void (*check)(void), bool load_gpu)
 	writeval(stuff[MIN].filp, origfreqs[RPn]);
 	if (load_gpu)
 		do_load_gpu();
-	check();
+	check2();
 
 	igt_debug("\nDecrease min below RPn (invalid)...\n");
 	writeval_inval(stuff[MIN].filp, 0);
@@ -420,11 +420,11 @@  static void min_max_config(void (*check)(void), bool load_gpu)
 
 	igt_debug("\nDecrease max to midpoint...\n");
 	writeval(stuff[MAX].filp, fmid);
-	check();
+	check2();
 
 	igt_debug("\nDecrease max to RPn...\n");
 	writeval(stuff[MAX].filp, origfreqs[RPn]);
-	check();
+	check2();
 
 	igt_debug("\nDecrease max below RPn (invalid)...\n");
 	writeval_inval(stuff[MAX].filp, 0);
@@ -436,11 +436,11 @@  static void min_max_config(void (*check)(void), bool load_gpu)
 
 	igt_debug("\nIncrease max to midpoint...\n");
 	writeval(stuff[MAX].filp, fmid);
-	check();
+	check2();
 
 	igt_debug("\nIncrease max to RP0...\n");
 	writeval(stuff[MAX].filp, origfreqs[RP0]);
-	check();
+	check2();
 
 	igt_debug("\nIncrease max above RP0 (invalid)...\n");
 	writeval_inval(stuff[MAX].filp, origfreqs[RP0] + 1000);
@@ -461,7 +461,7 @@  static void basic_check(void)
 
 #define IDLE_WAIT_TIMESTEP_MSEC 100
 #define IDLE_WAIT_TIMEOUT_MSEC 10000
-static void idle_check(void)
+static void idle_check_min(void)
 {
 	int freqs[NUMFREQ];
 	int wait = 0;
@@ -482,6 +482,27 @@  static void idle_check(void)
 	igt_debug("Required %d msec to reach cur<=min\n", wait);
 }
 
+static void idle_check_idle(void)
+{
+	int freqs[NUMFREQ];
+	int wait = 0;
+
+	/* Monitor frequencies until cur settles down to min, which should
+	 * happen within the allotted time */
+	do {
+		read_freqs(freqs);
+		dump(freqs);
+		checkit(freqs);
+		if (freqs[CUR] == freqs[RPn])
+			break;
+		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
+		wait += IDLE_WAIT_TIMESTEP_MSEC;
+	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
+
+	igt_assert_eq(freqs[CUR], freqs[RPn]);
+	igt_debug("Required %d msec to reach cur=idle\n", wait);
+}
+
 #define LOADED_WAIT_TIMESTEP_MSEC 100
 #define LOADED_WAIT_TIMEOUT_MSEC 3000
 static void loaded_check(void)
@@ -577,7 +598,7 @@  static void reset(void)
 
 	igt_debug("Removing load...\n");
 	load_helper_stop();
-	idle_check();
+	idle_check_min();
 }
 
 /*
@@ -635,7 +656,7 @@  static void blocking(void)
 	matchit(pre_freqs, post_freqs);
 
 	igt_debug("Removing load...\n");
-	idle_check();
+	idle_check_min();
 }
 
 static void pm_rps_exit_handler(int sig)
@@ -686,14 +707,14 @@  igt_main
 	}
 
 	igt_subtest("basic-api")
-		min_max_config(basic_check, false);
+		min_max_config(basic_check, basic_check, false);
 
 	igt_subtest("min-max-config-idle")
-		min_max_config(idle_check, true);
+		min_max_config(idle_check_min, idle_check_idle, true);
 
 	igt_subtest("min-max-config-loaded") {
 		load_helper_run(HIGH);
-		min_max_config(loaded_check, false);
+		min_max_config(loaded_check, loaded_check, false);
 		load_helper_stop();
 	}
 

Comments

On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> Now that the frequency can drop below the user specified minimum when
> the gpu is idle, add some checking to verify that it really does drop
> down to the RPn frequency in specific cases.
> 
> To do this, modify the main test flow to take two 'check' routines
> instead of one. When running the config-min-max-idle subtest make
> use of the second check routine to verify that the frequency drops
> to RPn instead of simply <= user min frequency.  For all other
> subtests, use the original check routines for both checks.
> 
> Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>

I don't see the point. The frequency should always drop to the idle
frequency if the GPU is idle, it's not enough that it's below MIN-freq. 
So why do we need separate CUR-freq<=MIN-freq and CUR-freq==idle-freq
checks?

> ---
>  tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> index f919625..96225ce 100644
> --- a/tests/pm_rps.c
> +++ b/tests/pm_rps.c
> @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int target)
>  	return ret;
>  }
>  
> -static void min_max_config(void (*check)(void), bool load_gpu)
> +static void min_max_config(void (*check)(void), void
> (*check2)(void), bool load_gpu)
>  {
>  	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
>  
> @@ -384,7 +384,7 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  	writeval(stuff[MAX].filp, origfreqs[RP0]);
>  	if (load_gpu)
>  		do_load_gpu();
> -	check();
> +	check2();
>  
>  	igt_debug("\nIncrease min to midpoint...\n");
>  	writeval(stuff[MIN].filp, fmid);
> @@ -412,7 +412,7 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  	writeval(stuff[MIN].filp, origfreqs[RPn]);
>  	if (load_gpu)
>  		do_load_gpu();
> -	check();
> +	check2();
>  
>  	igt_debug("\nDecrease min below RPn (invalid)...\n");
>  	writeval_inval(stuff[MIN].filp, 0);
> @@ -420,11 +420,11 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  
>  	igt_debug("\nDecrease max to midpoint...\n");
>  	writeval(stuff[MAX].filp, fmid);
> -	check();
> +	check2();
>  
>  	igt_debug("\nDecrease max to RPn...\n");
>  	writeval(stuff[MAX].filp, origfreqs[RPn]);
> -	check();
> +	check2();
>  
>  	igt_debug("\nDecrease max below RPn (invalid)...\n");
>  	writeval_inval(stuff[MAX].filp, 0);
> @@ -436,11 +436,11 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  
>  	igt_debug("\nIncrease max to midpoint...\n");
>  	writeval(stuff[MAX].filp, fmid);
> -	check();
> +	check2();
>  
>  	igt_debug("\nIncrease max to RP0...\n");
>  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> -	check();
> +	check2();
>  
>  	igt_debug("\nIncrease max above RP0 (invalid)...\n");
>  	writeval_inval(stuff[MAX].filp, origfreqs[RP0] + 1000);
> @@ -461,7 +461,7 @@ static void basic_check(void)
>  
>  #define IDLE_WAIT_TIMESTEP_MSEC 100
>  #define IDLE_WAIT_TIMEOUT_MSEC 10000
> -static void idle_check(void)
> +static void idle_check_min(void)
>  {
>  	int freqs[NUMFREQ];
>  	int wait = 0;
> @@ -482,6 +482,27 @@ static void idle_check(void)
>  	igt_debug("Required %d msec to reach cur<=min\n", wait);
>  }
>  
> +static void idle_check_idle(void)
> +{
> +	int freqs[NUMFREQ];
> +	int wait = 0;
> +
> +	/* Monitor frequencies until cur settles down to min, which
> should
> +	 * happen within the allotted time */
> +	do {
> +		read_freqs(freqs);
> +		dump(freqs);
> +		checkit(freqs);
> +		if (freqs[CUR] == freqs[RPn])
> +			break;
> +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
> +		wait += IDLE_WAIT_TIMESTEP_MSEC;
> +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
> +
> +	igt_assert_eq(freqs[CUR], freqs[RPn]);
> +	igt_debug("Required %d msec to reach cur=idle\n", wait);
> +}
> +
>  #define LOADED_WAIT_TIMESTEP_MSEC 100
>  #define LOADED_WAIT_TIMEOUT_MSEC 3000
>  static void loaded_check(void)
> @@ -577,7 +598,7 @@ static void reset(void)
>  
>  	igt_debug("Removing load...\n");
>  	load_helper_stop();
> -	idle_check();
> +	idle_check_min();
>  }
>  
>  /*
> @@ -635,7 +656,7 @@ static void blocking(void)
>  	matchit(pre_freqs, post_freqs);
>  
>  	igt_debug("Removing load...\n");
> -	idle_check();
> +	idle_check_min();
>  }
>  
>  static void pm_rps_exit_handler(int sig)
> @@ -686,14 +707,14 @@ igt_main
>  	}
>  
>  	igt_subtest("basic-api")
> -		min_max_config(basic_check, false);
> +		min_max_config(basic_check, basic_check, false);
>  
>  	igt_subtest("min-max-config-idle")
> -		min_max_config(idle_check, true);
> +		min_max_config(idle_check_min, idle_check_idle,
> true);
>  
>  	igt_subtest("min-max-config-loaded") {
>  		load_helper_run(HIGH);
> -		min_max_config(loaded_check, false);
> +		min_max_config(loaded_check, loaded_check, false);
>  		load_helper_stop();
>  	}
>
On Tue, 1 Dec 2015 15:56:55 +0200
Imre Deak <imre.deak@intel.com> wrote:

> On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> > Now that the frequency can drop below the user specified minimum when
> > the gpu is idle, add some checking to verify that it really does drop
> > down to the RPn frequency in specific cases.
> > 
> > To do this, modify the main test flow to take two 'check' routines
> > instead of one. When running the config-min-max-idle subtest make
> > use of the second check routine to verify that the frequency drops
> > to RPn instead of simply <= user min frequency.  For all other
> > subtests, use the original check routines for both checks.
> > 
> > Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
> 
> I don't see the point. The frequency should always drop to the idle
> frequency if the GPU is idle, it's not enough that it's below MIN-freq. 
> So why do we need separate CUR-freq<=MIN-freq and CUR-freq==idle-freq
> checks?

I started from the premise that it should always drop to idle but
that's just not the case.  The min_max_config() function is used for
both the idle and loaded subtests.  If you try to check for freq=idle
when doing the loaded subtest it will fail since it never goes idle.
Even in the idle subtest there are cases where it doesn't drop down to
idle. 

I suppose I could duplicate min_max_config and have a
min_max_config_idle() and min_max_conifg_not_idle() for use by the
different subtests. 

The real problem is that the test was not designed to handle the case
where the freq could drop below min and probably needs to be
re-designed.  I've been trying to find a quick fix vs. a complete
overhaul as this isn't really a priority for me.

> 
> > ---
> >  tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++-------------
> >  1 file changed, 34 insertions(+), 13 deletions(-)
> > 
> > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > index f919625..96225ce 100644
> > --- a/tests/pm_rps.c
> > +++ b/tests/pm_rps.c
> > @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int target)
> >  	return ret;
> >  }
> >  
> > -static void min_max_config(void (*check)(void), bool load_gpu)
> > +static void min_max_config(void (*check)(void), void
> > (*check2)(void), bool load_gpu)
> >  {
> >  	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
> >  
> > @@ -384,7 +384,7 @@ static void min_max_config(void (*check)(void),
> > bool load_gpu)
> >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> >  	if (load_gpu)
> >  		do_load_gpu();
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nIncrease min to midpoint...\n");
> >  	writeval(stuff[MIN].filp, fmid);
> > @@ -412,7 +412,7 @@ static void min_max_config(void (*check)(void),
> > bool load_gpu)
> >  	writeval(stuff[MIN].filp, origfreqs[RPn]);
> >  	if (load_gpu)
> >  		do_load_gpu();
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nDecrease min below RPn (invalid)...\n");
> >  	writeval_inval(stuff[MIN].filp, 0);
> > @@ -420,11 +420,11 @@ static void min_max_config(void (*check)(void),
> > bool load_gpu)
> >  
> >  	igt_debug("\nDecrease max to midpoint...\n");
> >  	writeval(stuff[MAX].filp, fmid);
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nDecrease max to RPn...\n");
> >  	writeval(stuff[MAX].filp, origfreqs[RPn]);
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nDecrease max below RPn (invalid)...\n");
> >  	writeval_inval(stuff[MAX].filp, 0);
> > @@ -436,11 +436,11 @@ static void min_max_config(void (*check)(void),
> > bool load_gpu)
> >  
> >  	igt_debug("\nIncrease max to midpoint...\n");
> >  	writeval(stuff[MAX].filp, fmid);
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nIncrease max to RP0...\n");
> >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > -	check();
> > +	check2();
> >  
> >  	igt_debug("\nIncrease max above RP0 (invalid)...\n");
> >  	writeval_inval(stuff[MAX].filp, origfreqs[RP0] + 1000);
> > @@ -461,7 +461,7 @@ static void basic_check(void)
> >  
> >  #define IDLE_WAIT_TIMESTEP_MSEC 100
> >  #define IDLE_WAIT_TIMEOUT_MSEC 10000
> > -static void idle_check(void)
> > +static void idle_check_min(void)
> >  {
> >  	int freqs[NUMFREQ];
> >  	int wait = 0;
> > @@ -482,6 +482,27 @@ static void idle_check(void)
> >  	igt_debug("Required %d msec to reach cur<=min\n", wait);
> >  }
> >  
> > +static void idle_check_idle(void)
> > +{
> > +	int freqs[NUMFREQ];
> > +	int wait = 0;
> > +
> > +	/* Monitor frequencies until cur settles down to min, which
> > should
> > +	 * happen within the allotted time */
> > +	do {
> > +		read_freqs(freqs);
> > +		dump(freqs);
> > +		checkit(freqs);
> > +		if (freqs[CUR] == freqs[RPn])
> > +			break;
> > +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
> > +		wait += IDLE_WAIT_TIMESTEP_MSEC;
> > +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
> > +
> > +	igt_assert_eq(freqs[CUR], freqs[RPn]);
> > +	igt_debug("Required %d msec to reach cur=idle\n", wait);
> > +}
> > +
> >  #define LOADED_WAIT_TIMESTEP_MSEC 100
> >  #define LOADED_WAIT_TIMEOUT_MSEC 3000
> >  static void loaded_check(void)
> > @@ -577,7 +598,7 @@ static void reset(void)
> >  
> >  	igt_debug("Removing load...\n");
> >  	load_helper_stop();
> > -	idle_check();
> > +	idle_check_min();
> >  }
> >  
> >  /*
> > @@ -635,7 +656,7 @@ static void blocking(void)
> >  	matchit(pre_freqs, post_freqs);
> >  
> >  	igt_debug("Removing load...\n");
> > -	idle_check();
> > +	idle_check_min();
> >  }
> >  
> >  static void pm_rps_exit_handler(int sig)
> > @@ -686,14 +707,14 @@ igt_main
> >  	}
> >  
> >  	igt_subtest("basic-api")
> > -		min_max_config(basic_check, false);
> > +		min_max_config(basic_check, basic_check, false);
> >  
> >  	igt_subtest("min-max-config-idle")
> > -		min_max_config(idle_check, true);
> > +		min_max_config(idle_check_min, idle_check_idle,
> > true);
> >  
> >  	igt_subtest("min-max-config-loaded") {
> >  		load_helper_run(HIGH);
> > -		min_max_config(loaded_check, false);
> > +		min_max_config(loaded_check, loaded_check, false);
> >  		load_helper_stop();
> >  	}
> >
On ti, 2015-12-01 at 09:22 -0800, Bob Paauwe wrote:
> On Tue, 1 Dec 2015 15:56:55 +0200
> Imre Deak <imre.deak@intel.com> wrote:
> 
> > On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> > > Now that the frequency can drop below the user specified minimum
> > > when
> > > the gpu is idle, add some checking to verify that it really does
> > > drop
> > > down to the RPn frequency in specific cases.
> > > 
> > > To do this, modify the main test flow to take two 'check'
> > > routines
> > > instead of one. When running the config-min-max-idle subtest make
> > > use of the second check routine to verify that the frequency
> > > drops
> > > to RPn instead of simply <= user min frequency.  For all other
> > > subtests, use the original check routines for both checks.
> > > 
> > > Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
> > 
> > I don't see the point. The frequency should always drop to the idle
> > frequency if the GPU is idle, it's not enough that it's below MIN-
> > freq. 
> > So why do we need separate CUR-freq<=MIN-freq and CUR-freq==idle-
> > freq
> > checks?
> 
> I started from the premise that it should always drop to idle but
> that's just not the case.

It is the correct premise and if it doesn't hold then there is a real
bug either in the testcase or the kernel which needs to be addressed
differently. I haven't found anything concrete but one suspect is the
logic that waits for the GPU idle condition, maybe the timeout
value idle_check() or the hard-coded duration in do_load_gpu() is
incorrect. In any case let's not paper over this issue, the very reason
we have test cases is to uncover such bugs.

> The min_max_config() function is used for
> both the idle and loaded subtests.  If you try to check for freq=idle
> when doing the loaded subtest it will fail since it never goes idle.
> Even in the idle subtest there are cases where it doesn't drop down
> to
> idle. 

The only place we should check for freq==idle is in idle_check() and
that one is not called during the loaded subtest, it wouldn't even make
sense to do so. So I'm not sure how this subtest fails for you. 

> I suppose I could duplicate min_max_config and have a
> min_max_config_idle() and min_max_conifg_not_idle() for use by the
> different subtests. 

The point of the "check" argument of min_max_config() is to distinguish
between the idle and loaded cases. The check functions passed in know
already if they can expect the frequency to reach the idle frequency
or not.

> The real problem is that the test was not designed to handle the case
> where the freq could drop below min and probably needs to be
> re-designed.  I've been trying to find a quick fix vs. a complete
> overhaul as this isn't really a priority for me.

I think we only need your first patch and if there is any failure after
that we have to root cause that and fix it properly.

--Imre

> 
> > 
> > > ---
> > >  tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++--------
> > > -----
> > >  1 file changed, 34 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > > index f919625..96225ce 100644
> > > --- a/tests/pm_rps.c
> > > +++ b/tests/pm_rps.c
> > > @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int target)
> > >  	return ret;
> > >  }
> > >  
> > > -static void min_max_config(void (*check)(void), bool load_gpu)
> > > +static void min_max_config(void (*check)(void), void
> > > (*check2)(void), bool load_gpu)
> > >  {
> > >  	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
> > >  
> > > @@ -384,7 +384,7 @@ static void min_max_config(void
> > > (*check)(void),
> > > bool load_gpu)
> > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > >  	if (load_gpu)
> > >  		do_load_gpu();
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nIncrease min to midpoint...\n");
> > >  	writeval(stuff[MIN].filp, fmid);
> > > @@ -412,7 +412,7 @@ static void min_max_config(void
> > > (*check)(void),
> > > bool load_gpu)
> > >  	writeval(stuff[MIN].filp, origfreqs[RPn]);
> > >  	if (load_gpu)
> > >  		do_load_gpu();
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nDecrease min below RPn (invalid)...\n");
> > >  	writeval_inval(stuff[MIN].filp, 0);
> > > @@ -420,11 +420,11 @@ static void min_max_config(void
> > > (*check)(void),
> > > bool load_gpu)
> > >  
> > >  	igt_debug("\nDecrease max to midpoint...\n");
> > >  	writeval(stuff[MAX].filp, fmid);
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nDecrease max to RPn...\n");
> > >  	writeval(stuff[MAX].filp, origfreqs[RPn]);
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nDecrease max below RPn (invalid)...\n");
> > >  	writeval_inval(stuff[MAX].filp, 0);
> > > @@ -436,11 +436,11 @@ static void min_max_config(void
> > > (*check)(void),
> > > bool load_gpu)
> > >  
> > >  	igt_debug("\nIncrease max to midpoint...\n");
> > >  	writeval(stuff[MAX].filp, fmid);
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nIncrease max to RP0...\n");
> > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > > -	check();
> > > +	check2();
> > >  
> > >  	igt_debug("\nIncrease max above RP0 (invalid)...\n");
> > >  	writeval_inval(stuff[MAX].filp, origfreqs[RP0] + 1000);
> > > @@ -461,7 +461,7 @@ static void basic_check(void)
> > >  
> > >  #define IDLE_WAIT_TIMESTEP_MSEC 100
> > >  #define IDLE_WAIT_TIMEOUT_MSEC 10000
> > > -static void idle_check(void)
> > > +static void idle_check_min(void)
> > >  {
> > >  	int freqs[NUMFREQ];
> > >  	int wait = 0;
> > > @@ -482,6 +482,27 @@ static void idle_check(void)
> > >  	igt_debug("Required %d msec to reach cur<=min\n", wait);
> > >  }
> > >  
> > > +static void idle_check_idle(void)
> > > +{
> > > +	int freqs[NUMFREQ];
> > > +	int wait = 0;
> > > +
> > > +	/* Monitor frequencies until cur settles down to min,
> > > which
> > > should
> > > +	 * happen within the allotted time */
> > > +	do {
> > > +		read_freqs(freqs);
> > > +		dump(freqs);
> > > +		checkit(freqs);
> > > +		if (freqs[CUR] == freqs[RPn])
> > > +			break;
> > > +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
> > > +		wait += IDLE_WAIT_TIMESTEP_MSEC;
> > > +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
> > > +
> > > +	igt_assert_eq(freqs[CUR], freqs[RPn]);
> > > +	igt_debug("Required %d msec to reach cur=idle\n", wait);
> > > +}
> > > +
> > >  #define LOADED_WAIT_TIMESTEP_MSEC 100
> > >  #define LOADED_WAIT_TIMEOUT_MSEC 3000
> > >  static void loaded_check(void)
> > > @@ -577,7 +598,7 @@ static void reset(void)
> > >  
> > >  	igt_debug("Removing load...\n");
> > >  	load_helper_stop();
> > > -	idle_check();
> > > +	idle_check_min();
> > >  }
> > >  
> > >  /*
> > > @@ -635,7 +656,7 @@ static void blocking(void)
> > >  	matchit(pre_freqs, post_freqs);
> > >  
> > >  	igt_debug("Removing load...\n");
> > > -	idle_check();
> > > +	idle_check_min();
> > >  }
> > >  
> > >  static void pm_rps_exit_handler(int sig)
> > > @@ -686,14 +707,14 @@ igt_main
> > >  	}
> > >  
> > >  	igt_subtest("basic-api")
> > > -		min_max_config(basic_check, false);
> > > +		min_max_config(basic_check, basic_check, false);
> > >  
> > >  	igt_subtest("min-max-config-idle")
> > > -		min_max_config(idle_check, true);
> > > +		min_max_config(idle_check_min, idle_check_idle,
> > > true);
> > >  
> > >  	igt_subtest("min-max-config-loaded") {
> > >  		load_helper_run(HIGH);
> > > -		min_max_config(loaded_check, false);
> > > +		min_max_config(loaded_check, loaded_check,
> > > false);
> > >  		load_helper_stop();
> > >  	}
> > >  
> 
> 
>
On Tue, 1 Dec 2015 19:43:05 +0200
Imre Deak <imre.deak@intel.com> wrote:

> On ti, 2015-12-01 at 09:22 -0800, Bob Paauwe wrote:
> > On Tue, 1 Dec 2015 15:56:55 +0200
> > Imre Deak <imre.deak@intel.com> wrote:
> > 
> > > On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> > > > Now that the frequency can drop below the user specified minimum
> > > > when
> > > > the gpu is idle, add some checking to verify that it really does
> > > > drop
> > > > down to the RPn frequency in specific cases.
> > > > 
> > > > To do this, modify the main test flow to take two 'check'
> > > > routines
> > > > instead of one. When running the config-min-max-idle subtest make
> > > > use of the second check routine to verify that the frequency
> > > > drops
> > > > to RPn instead of simply <= user min frequency.  For all other
> > > > subtests, use the original check routines for both checks.
> > > > 
> > > > Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
> > > 
> > > I don't see the point. The frequency should always drop to the idle
> > > frequency if the GPU is idle, it's not enough that it's below MIN-
> > > freq. 
> > > So why do we need separate CUR-freq<=MIN-freq and CUR-freq==idle-
> > > freq
> > > checks?
> > 
> > I started from the premise that it should always drop to idle but
> > that's just not the case.
> 
> It is the correct premise and if it doesn't hold then there is a real
> bug either in the testcase or the kernel which needs to be addressed
> differently. I haven't found anything concrete but one suspect is the
> logic that waits for the GPU idle condition, maybe the timeout
> value idle_check() or the hard-coded duration in do_load_gpu() is
> incorrect. In any case let's not paper over this issue, the very reason
> we have test cases is to uncover such bugs.
> 
> > The min_max_config() function is used for
> > both the idle and loaded subtests.  If you try to check for freq=idle
> > when doing the loaded subtest it will fail since it never goes idle.
> > Even in the idle subtest there are cases where it doesn't drop down
> > to
> > idle. 
> 
> The only place we should check for freq==idle is in idle_check() and
> that one is not called during the loaded subtest, it wouldn't even make
> sense to do so. So I'm not sure how this subtest fails for you. 
> 
> > I suppose I could duplicate min_max_config and have a
> > min_max_config_idle() and min_max_conifg_not_idle() for use by the
> > different subtests. 
> 
> The point of the "check" argument of min_max_config() is to distinguish
> between the idle and loaded cases. The check functions passed in know
> already if they can expect the frequency to reach the idle frequency
> or not.
> 
> > The real problem is that the test was not designed to handle the case
> > where the freq could drop below min and probably needs to be
> > re-designed.  I've been trying to find a quick fix vs. a complete
> > overhaul as this isn't really a priority for me.
> 
> I think we only need your first patch and if there is any failure after
> that we have to root cause that and fix it properly.
> 
> --Imre

You're right.  I'm working with BXT and it seems like it's got some
unique issues with RPS.  I just sent a new patch based on the first one
but with the changes you suggested to check for ==idle instead of <=min.

Maybe you have some insights into what I'm seeing with BXT?  The first
problem I had was that I would see threshold up interrupts but not any
threshold down interrupts.  The missing down interrupts was related to
the BIOS setting.  I had runtime PM disabled so it seems strange that I
was getting the up interrupts. 

With the BIOS setting corrected, the driver started disabling the down
interrupts once the freq dropped to min or just below because of the min
vs. idle difference.  I have a patch for this that I'll send shortly.

The remaining issue seems to be some type of timing issue. I've had to
add a 35000us sleep between updating the interrupt enable register
(0xA168) and the posting read of freq. register (0xA008), otherwise it
acts like the change to the interrupt enable register never happened.
None of the documentation I have indicates that this is needed.

Bob


> 
> > 
> > > 
> > > > ---
> > > >  tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++--------
> > > > -----
> > > >  1 file changed, 34 insertions(+), 13 deletions(-)
> > > > 
> > > > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > > > index f919625..96225ce 100644
> > > > --- a/tests/pm_rps.c
> > > > +++ b/tests/pm_rps.c
> > > > @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int target)
> > > >  	return ret;
> > > >  }
> > > >  
> > > > -static void min_max_config(void (*check)(void), bool load_gpu)
> > > > +static void min_max_config(void (*check)(void), void
> > > > (*check2)(void), bool load_gpu)
> > > >  {
> > > >  	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
> > > >  
> > > > @@ -384,7 +384,7 @@ static void min_max_config(void
> > > > (*check)(void),
> > > > bool load_gpu)
> > > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > > >  	if (load_gpu)
> > > >  		do_load_gpu();
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nIncrease min to midpoint...\n");
> > > >  	writeval(stuff[MIN].filp, fmid);
> > > > @@ -412,7 +412,7 @@ static void min_max_config(void
> > > > (*check)(void),
> > > > bool load_gpu)
> > > >  	writeval(stuff[MIN].filp, origfreqs[RPn]);
> > > >  	if (load_gpu)
> > > >  		do_load_gpu();
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nDecrease min below RPn (invalid)...\n");
> > > >  	writeval_inval(stuff[MIN].filp, 0);
> > > > @@ -420,11 +420,11 @@ static void min_max_config(void
> > > > (*check)(void),
> > > > bool load_gpu)
> > > >  
> > > >  	igt_debug("\nDecrease max to midpoint...\n");
> > > >  	writeval(stuff[MAX].filp, fmid);
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nDecrease max to RPn...\n");
> > > >  	writeval(stuff[MAX].filp, origfreqs[RPn]);
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nDecrease max below RPn (invalid)...\n");
> > > >  	writeval_inval(stuff[MAX].filp, 0);
> > > > @@ -436,11 +436,11 @@ static void min_max_config(void
> > > > (*check)(void),
> > > > bool load_gpu)
> > > >  
> > > >  	igt_debug("\nIncrease max to midpoint...\n");
> > > >  	writeval(stuff[MAX].filp, fmid);
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nIncrease max to RP0...\n");
> > > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > > > -	check();
> > > > +	check2();
> > > >  
> > > >  	igt_debug("\nIncrease max above RP0 (invalid)...\n");
> > > >  	writeval_inval(stuff[MAX].filp, origfreqs[RP0] + 1000);
> > > > @@ -461,7 +461,7 @@ static void basic_check(void)
> > > >  
> > > >  #define IDLE_WAIT_TIMESTEP_MSEC 100
> > > >  #define IDLE_WAIT_TIMEOUT_MSEC 10000
> > > > -static void idle_check(void)
> > > > +static void idle_check_min(void)
> > > >  {
> > > >  	int freqs[NUMFREQ];
> > > >  	int wait = 0;
> > > > @@ -482,6 +482,27 @@ static void idle_check(void)
> > > >  	igt_debug("Required %d msec to reach cur<=min\n", wait);
> > > >  }
> > > >  
> > > > +static void idle_check_idle(void)
> > > > +{
> > > > +	int freqs[NUMFREQ];
> > > > +	int wait = 0;
> > > > +
> > > > +	/* Monitor frequencies until cur settles down to min,
> > > > which
> > > > should
> > > > +	 * happen within the allotted time */
> > > > +	do {
> > > > +		read_freqs(freqs);
> > > > +		dump(freqs);
> > > > +		checkit(freqs);
> > > > +		if (freqs[CUR] == freqs[RPn])
> > > > +			break;
> > > > +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
> > > > +		wait += IDLE_WAIT_TIMESTEP_MSEC;
> > > > +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
> > > > +
> > > > +	igt_assert_eq(freqs[CUR], freqs[RPn]);
> > > > +	igt_debug("Required %d msec to reach cur=idle\n", wait);
> > > > +}
> > > > +
> > > >  #define LOADED_WAIT_TIMESTEP_MSEC 100
> > > >  #define LOADED_WAIT_TIMEOUT_MSEC 3000
> > > >  static void loaded_check(void)
> > > > @@ -577,7 +598,7 @@ static void reset(void)
> > > >  
> > > >  	igt_debug("Removing load...\n");
> > > >  	load_helper_stop();
> > > > -	idle_check();
> > > > +	idle_check_min();
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -635,7 +656,7 @@ static void blocking(void)
> > > >  	matchit(pre_freqs, post_freqs);
> > > >  
> > > >  	igt_debug("Removing load...\n");
> > > > -	idle_check();
> > > > +	idle_check_min();
> > > >  }
> > > >  
> > > >  static void pm_rps_exit_handler(int sig)
> > > > @@ -686,14 +707,14 @@ igt_main
> > > >  	}
> > > >  
> > > >  	igt_subtest("basic-api")
> > > > -		min_max_config(basic_check, false);
> > > > +		min_max_config(basic_check, basic_check, false);
> > > >  
> > > >  	igt_subtest("min-max-config-idle")
> > > > -		min_max_config(idle_check, true);
> > > > +		min_max_config(idle_check_min, idle_check_idle,
> > > > true);
> > > >  
> > > >  	igt_subtest("min-max-config-loaded") {
> > > >  		load_helper_run(HIGH);
> > > > -		min_max_config(loaded_check, false);
> > > > +		min_max_config(loaded_check, loaded_check,
> > > > false);
> > > >  		load_helper_stop();
> > > >  	}
> > > >  
> > 
> > 
> >
On to, 2015-12-03 at 16:43 -0800, Bob Paauwe wrote:
> On Tue, 1 Dec 2015 19:43:05 +0200
> Imre Deak <imre.deak@intel.com> wrote:
> 
> > On ti, 2015-12-01 at 09:22 -0800, Bob Paauwe wrote:
> > > On Tue, 1 Dec 2015 15:56:55 +0200
> > > Imre Deak <imre.deak@intel.com> wrote:
> > > 
> > > > On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> > > > > Now that the frequency can drop below the user specified
> > > > > minimum
> > > > > when
> > > > > the gpu is idle, add some checking to verify that it really
> > > > > does
> > > > > drop
> > > > > down to the RPn frequency in specific cases.
> > > > > 
> > > > > To do this, modify the main test flow to take two 'check'
> > > > > routines
> > > > > instead of one. When running the config-min-max-idle subtest
> > > > > make
> > > > > use of the second check routine to verify that the frequency
> > > > > drops
> > > > > to RPn instead of simply <= user min frequency.  For all
> > > > > other
> > > > > subtests, use the original check routines for both checks.
> > > > > 
> > > > > Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
> > > > 
> > > > I don't see the point. The frequency should always drop to the
> > > > idle
> > > > frequency if the GPU is idle, it's not enough that it's below
> > > > MIN-
> > > > freq. 
> > > > So why do we need separate CUR-freq<=MIN-freq and CUR-
> > > > freq==idle-
> > > > freq
> > > > checks?
> > > 
> > > I started from the premise that it should always drop to idle but
> > > that's just not the case.
> > 
> > It is the correct premise and if it doesn't hold then there is a
> > real
> > bug either in the testcase or the kernel which needs to be
> > addressed
> > differently. I haven't found anything concrete but one suspect is
> > the
> > logic that waits for the GPU idle condition, maybe the timeout
> > value idle_check() or the hard-coded duration in do_load_gpu() is
> > incorrect. In any case let's not paper over this issue, the very
> > reason
> > we have test cases is to uncover such bugs.
> > 
> > > The min_max_config() function is used for
> > > both the idle and loaded subtests.  If you try to check for
> > > freq=idle
> > > when doing the loaded subtest it will fail since it never goes
> > > idle.
> > > Even in the idle subtest there are cases where it doesn't drop
> > > down
> > > to
> > > idle. 
> > 
> > The only place we should check for freq==idle is in idle_check()
> > and
> > that one is not called during the loaded subtest, it wouldn't even
> > make
> > sense to do so. So I'm not sure how this subtest fails for you. 
> > 
> > > I suppose I could duplicate min_max_config and have a
> > > min_max_config_idle() and min_max_conifg_not_idle() for use by
> > > the
> > > different subtests. 
> > 
> > The point of the "check" argument of min_max_config() is to
> > distinguish
> > between the idle and loaded cases. The check functions passed in
> > know
> > already if they can expect the frequency to reach the idle
> > frequency
> > or not.
> > 
> > > The real problem is that the test was not designed to handle the
> > > case
> > > where the freq could drop below min and probably needs to be
> > > re-designed.  I've been trying to find a quick fix vs. a complete
> > > overhaul as this isn't really a priority for me.
> > 
> > I think we only need your first patch and if there is any failure
> > after
> > that we have to root cause that and fix it properly.
> > 
> > --Imre
> 
> You're right.  I'm working with BXT and it seems like it's got some
> unique issues with RPS.  I just sent a new patch based on the first
> one
> but with the changes you suggested to check for ==idle instead of
> <=min.
> 
> Maybe you have some insights into what I'm seeing with BXT?  The first
> problem I had was that I would see threshold up interrupts but not any
> threshold down interrupts.  The missing down interrupts was related to
> the BIOS setting.  I had runtime PM disabled so it seems strange that I
> was getting the up interrupts. 

Yes, I saw this too. I wonder if we could check this somehow and not
enable RPS if BIOS hasn't set things up properly. Sagar has a patch
that checks the RC6 setup [1]; that's not exactly RPS, but since they
are setup at the same place I think we could use that for now for RPS
too.

> With the BIOS setting corrected, the driver started disabling the down
> interrupts once the freq dropped to min or just below because of the min
> vs. idle difference.  I have a patch for this that I'll send shortly.

Hm, that's not necessarily a problem. To reach the idle frequency we
don't depend on the up/down interrupts. The idle frequency gets set
explicitly from intel_mark_idle(), so if you don't reach the idle freq
then this function doesn't get called for some reason. Or as I said
earlier we just don't wait enough in do_load_gpu() or idle_check() in
the test, so we read out cur-freq before intel_mark_idle() would get
called.

> The remaining issue seems to be some type of timing issue. I've had
> to
> add a 35000us sleep between updating the interrupt enable register
> (0xA168) and the posting read of freq. register (0xA008), otherwise
> it
> acts like the change to the interrupt enable register never happened.
> None of the documentation I have indicates that this is needed.

What happens exactly? The frequency gets stuck above min-freq and you
never get a down interrupt? Not sure how this could happen, but there
is an interesting comment in intel_rps_limits() about this scenario.

--Imre

[1] http://lists.freedesktop.org/archives/intel-gfx/2015-November/07938
6.html


> 
> Bob
> 
> 
> > 
> > > 
> > > > 
> > > > > ---
> > > > >  tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++----
> > > > > ----
> > > > > -----
> > > > >  1 file changed, 34 insertions(+), 13 deletions(-)
> > > > > 
> > > > > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > > > > index f919625..96225ce 100644
> > > > > --- a/tests/pm_rps.c
> > > > > +++ b/tests/pm_rps.c
> > > > > @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int
> > > > > target)
> > > > >  	return ret;
> > > > >  }
> > > > >  
> > > > > -static void min_max_config(void (*check)(void), bool
> > > > > load_gpu)
> > > > > +static void min_max_config(void (*check)(void), void
> > > > > (*check2)(void), bool load_gpu)
> > > > >  {
> > > > >  	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
> > > > >  
> > > > > @@ -384,7 +384,7 @@ static void min_max_config(void
> > > > > (*check)(void),
> > > > > bool load_gpu)
> > > > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > > > >  	if (load_gpu)
> > > > >  		do_load_gpu();
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nIncrease min to midpoint...\n");
> > > > >  	writeval(stuff[MIN].filp, fmid);
> > > > > @@ -412,7 +412,7 @@ static void min_max_config(void
> > > > > (*check)(void),
> > > > > bool load_gpu)
> > > > >  	writeval(stuff[MIN].filp, origfreqs[RPn]);
> > > > >  	if (load_gpu)
> > > > >  		do_load_gpu();
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nDecrease min below RPn
> > > > > (invalid)...\n");
> > > > >  	writeval_inval(stuff[MIN].filp, 0);
> > > > > @@ -420,11 +420,11 @@ static void min_max_config(void
> > > > > (*check)(void),
> > > > > bool load_gpu)
> > > > >  
> > > > >  	igt_debug("\nDecrease max to midpoint...\n");
> > > > >  	writeval(stuff[MAX].filp, fmid);
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nDecrease max to RPn...\n");
> > > > >  	writeval(stuff[MAX].filp, origfreqs[RPn]);
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nDecrease max below RPn
> > > > > (invalid)...\n");
> > > > >  	writeval_inval(stuff[MAX].filp, 0);
> > > > > @@ -436,11 +436,11 @@ static void min_max_config(void
> > > > > (*check)(void),
> > > > > bool load_gpu)
> > > > >  
> > > > >  	igt_debug("\nIncrease max to midpoint...\n");
> > > > >  	writeval(stuff[MAX].filp, fmid);
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nIncrease max to RP0...\n");
> > > > >  	writeval(stuff[MAX].filp, origfreqs[RP0]);
> > > > > -	check();
> > > > > +	check2();
> > > > >  
> > > > >  	igt_debug("\nIncrease max above RP0
> > > > > (invalid)...\n");
> > > > >  	writeval_inval(stuff[MAX].filp, origfreqs[RP0] +
> > > > > 1000);
> > > > > @@ -461,7 +461,7 @@ static void basic_check(void)
> > > > >  
> > > > >  #define IDLE_WAIT_TIMESTEP_MSEC 100
> > > > >  #define IDLE_WAIT_TIMEOUT_MSEC 10000
> > > > > -static void idle_check(void)
> > > > > +static void idle_check_min(void)
> > > > >  {
> > > > >  	int freqs[NUMFREQ];
> > > > >  	int wait = 0;
> > > > > @@ -482,6 +482,27 @@ static void idle_check(void)
> > > > >  	igt_debug("Required %d msec to reach cur<=min\n",
> > > > > wait);
> > > > >  }
> > > > >  
> > > > > +static void idle_check_idle(void)
> > > > > +{
> > > > > +	int freqs[NUMFREQ];
> > > > > +	int wait = 0;
> > > > > +
> > > > > +	/* Monitor frequencies until cur settles down to
> > > > > min,
> > > > > which
> > > > > should
> > > > > +	 * happen within the allotted time */
> > > > > +	do {
> > > > > +		read_freqs(freqs);
> > > > > +		dump(freqs);
> > > > > +		checkit(freqs);
> > > > > +		if (freqs[CUR] == freqs[RPn])
> > > > > +			break;
> > > > > +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
> > > > > +		wait += IDLE_WAIT_TIMESTEP_MSEC;
> > > > > +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
> > > > > +
> > > > > +	igt_assert_eq(freqs[CUR], freqs[RPn]);
> > > > > +	igt_debug("Required %d msec to reach cur=idle\n",
> > > > > wait);
> > > > > +}
> > > > > +
> > > > >  #define LOADED_WAIT_TIMESTEP_MSEC 100
> > > > >  #define LOADED_WAIT_TIMEOUT_MSEC 3000
> > > > >  static void loaded_check(void)
> > > > > @@ -577,7 +598,7 @@ static void reset(void)
> > > > >  
> > > > >  	igt_debug("Removing load...\n");
> > > > >  	load_helper_stop();
> > > > > -	idle_check();
> > > > > +	idle_check_min();
> > > > >  }
> > > > >  
> > > > >  /*
> > > > > @@ -635,7 +656,7 @@ static void blocking(void)
> > > > >  	matchit(pre_freqs, post_freqs);
> > > > >  
> > > > >  	igt_debug("Removing load...\n");
> > > > > -	idle_check();
> > > > > +	idle_check_min();
> > > > >  }
> > > > >  
> > > > >  static void pm_rps_exit_handler(int sig)
> > > > > @@ -686,14 +707,14 @@ igt_main
> > > > >  	}
> > > > >  
> > > > >  	igt_subtest("basic-api")
> > > > > -		min_max_config(basic_check, false);
> > > > > +		min_max_config(basic_check, basic_check,
> > > > > false);
> > > > >  
> > > > >  	igt_subtest("min-max-config-idle")
> > > > > -		min_max_config(idle_check, true);
> > > > > +		min_max_config(idle_check_min,
> > > > > idle_check_idle,
> > > > > true);
> > > > >  
> > > > >  	igt_subtest("min-max-config-loaded") {
> > > > >  		load_helper_run(HIGH);
> > > > > -		min_max_config(loaded_check, false);
> > > > > +		min_max_config(loaded_check, loaded_check,
> > > > > false);
> > > > >  		load_helper_stop();
> > > > >  	}
> > > > >  
> > > 
> > > 
> > > 
> 
> 
>
On Fri, 4 Dec 2015 17:22:11 +0200
Imre Deak <imre.deak@intel.com> wrote:

> On to, 2015-12-03 at 16:43 -0800, Bob Paauwe wrote:
> > On Tue, 1 Dec 2015 19:43:05 +0200
> > Imre Deak <imre.deak@intel.com> wrote:
> > 
> > > On ti, 2015-12-01 at 09:22 -0800, Bob Paauwe wrote:
> > > > On Tue, 1 Dec 2015 15:56:55 +0200
> > > > Imre Deak <imre.deak@intel.com> wrote:
> > > > 
> > > > > On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
> > > > > > Now that the frequency can drop below the user specified
> > > > > > minimum
> > > > > > when
> > > > > > the gpu is idle, add some checking to verify that it really
> > > > > > does
> > > > > > drop
> > > > > > down to the RPn frequency in specific cases.
> > > > > > 
> > > > > > To do this, modify the main test flow to take two 'check'
> > > > > > routines
> > > > > > instead of one. When running the config-min-max-idle subtest
> > > > > > make
> > > > > > use of the second check routine to verify that the frequency
> > > > > > drops
> > > > > > to RPn instead of simply <= user min frequency.  For all
> > > > > > other
> > > > > > subtests, use the original check routines for both checks.
> > > > > > 
> > > > > > Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
> > > > > 
> > > > > I don't see the point. The frequency should always drop to the
> > > > > idle
> > > > > frequency if the GPU is idle, it's not enough that it's below
> > > > > MIN-
> > > > > freq. 
> > > > > So why do we need separate CUR-freq<=MIN-freq and CUR-
> > > > > freq==idle-
> > > > > freq
> > > > > checks?
> > > > 
> > > > I started from the premise that it should always drop to idle but
> > > > that's just not the case.
> > > 
> > > It is the correct premise and if it doesn't hold then there is a
> > > real
> > > bug either in the testcase or the kernel which needs to be
> > > addressed
> > > differently. I haven't found anything concrete but one suspect is
> > > the
> > > logic that waits for the GPU idle condition, maybe the timeout
> > > value idle_check() or the hard-coded duration in do_load_gpu() is
> > > incorrect. In any case let's not paper over this issue, the very
> > > reason
> > > we have test cases is to uncover such bugs.
> > > 
> > > > The min_max_config() function is used for
> > > > both the idle and loaded subtests.  If you try to check for
> > > > freq=idle
> > > > when doing the loaded subtest it will fail since it never goes
> > > > idle.
> > > > Even in the idle subtest there are cases where it doesn't drop
> > > > down
> > > > to
> > > > idle. 
> > > 
> > > The only place we should check for freq==idle is in idle_check()
> > > and
> > > that one is not called during the loaded subtest, it wouldn't even
> > > make
> > > sense to do so. So I'm not sure how this subtest fails for you. 
> > > 
> > > > I suppose I could duplicate min_max_config and have a
> > > > min_max_config_idle() and min_max_conifg_not_idle() for use by
> > > > the
> > > > different subtests. 
> > > 
> > > The point of the "check" argument of min_max_config() is to
> > > distinguish
> > > between the idle and loaded cases. The check functions passed in
> > > know
> > > already if they can expect the frequency to reach the idle
> > > frequency
> > > or not.
> > > 
> > > > The real problem is that the test was not designed to handle the
> > > > case
> > > > where the freq could drop below min and probably needs to be
> > > > re-designed.  I've been trying to find a quick fix vs. a complete
> > > > overhaul as this isn't really a priority for me.
> > > 
> > > I think we only need your first patch and if there is any failure
> > > after
> > > that we have to root cause that and fix it properly.
> > > 
> > > --Imre
> > 
> > You're right.  I'm working with BXT and it seems like it's got some
> > unique issues with RPS.  I just sent a new patch based on the first
> > one
> > but with the changes you suggested to check for ==idle instead of
> > <=min.
> > 
> > Maybe you have some insights into what I'm seeing with BXT?  The first
> > problem I had was that I would see threshold up interrupts but not any
> > threshold down interrupts.  The missing down interrupts was related to
> > the BIOS setting.  I had runtime PM disabled so it seems strange that I
> > was getting the up interrupts. 
> 
> Yes, I saw this too. I wonder if we could check this somehow and not
> enable RPS if BIOS hasn't set things up properly. Sagar has a patch
> that checks the RC6 setup [1]; that's not exactly RPS, but since they
> are setup at the same place I think we could use that for now for RPS
> too.

I'll take a look at that patch and see if I can do something like that
for RPS.  Thanks.

> 
> > With the BIOS setting corrected, the driver started disabling the down
> > interrupts once the freq dropped to min or just below because of the min
> > vs. idle difference.  I have a patch for this that I'll send shortly.
> 
> Hm, that's not necessarily a problem. To reach the idle frequency we
> don't depend on the up/down interrupts. The idle frequency gets set
> explicitly from intel_mark_idle(), so if you don't reach the idle freq
> then this function doesn't get called for some reason. Or as I said
> earlier we just don't wait enough in do_load_gpu() or idle_check() in
> the test, so we read out cur-freq before intel_mark_idle() would get
> called.

Maybe because I'm just testing from a console environment, but if I
change the min freq. via sysfs I don't get any type of call to drop
back to idle.  Possibly because I'm not generating any GPU load so it
never re-triggers an idle gpu?  I've waited quite a while and have
bumped up the time in idle_check().  Waiting for minutes doesn't seem
reasonable.

Should intel_mark_idle() get called periodically, even if the rings are
empty?
  
> 
> > The remaining issue seems to be some type of timing issue. I've had
> > to
> > add a 35000us sleep between updating the interrupt enable register
> > (0xA168) and the posting read of freq. register (0xA008), otherwise
> > it
> > acts like the change to the interrupt enable register never happened.
> > None of the documentation I have indicates that this is needed.
> 
> What happens exactly? The frequency gets stuck above min-freq and you
> never get a down interrupt? Not sure how this could happen, but there
> is an interesting comment in intel_rps_limits() about this scenario.

Basically if I set the min freq. through sysfs, it flows through all
the set threshold, create the interrupt mask code but the system
behaves as if the new threshold limits or interrupts were not changed.

So say RPn = 100, min = 100, current = 100
 - echo 383 > /sys/class/drm/card0/gt_min_freq_mhz  (midpoint freq)

gt_cur_freq_mhz will report 383 now and I don't see any interrupts.

If I add the delay, then right after I set the min to 383, I'll start
seeing the down threshold interrupts and it will lower the freq back
down to RPn (100) after two or three interrupts occur.

So I don't think this has anything to with RC6 and missing
interrupts that the comment refers to.

It took me a while to track this down because I was using a bunch of
printk's to trace the flow and values.  With the printk's it was
working but when I removed them, it stopped working. So it was about 3
or 4 printk's worth of delay that was needed.  But I really wonder if
there's something else that should be checked vs. just delaying. 

Bob

> 
> --Imre
> 
> [1] http://lists.freedesktop.org/archives/intel-gfx/2015-November/07938
> 6.html
> 
> 
> > 
> > Bob
> > 
> > 
> > > 

> > 
> > 
> >
On Fri, 2015-12-04 at 10:37 -0800, Bob Paauwe wrote:
> On Fri, 4 Dec 2015 17:22:11 +0200
> Imre Deak <imre.deak@intel.com> wrote:
> [...]
> > > With the BIOS setting corrected, the driver started disabling the
> > > down
> > > interrupts once the freq dropped to min or just below because of
> > > the min
> > > vs. idle difference.  I have a patch for this that I'll send
> > > shortly.
> > 
> > Hm, that's not necessarily a problem. To reach the idle frequency
> > we
> > don't depend on the up/down interrupts. The idle frequency gets set
> > explicitly from intel_mark_idle(), so if you don't reach the idle
> > freq
> > then this function doesn't get called for some reason. Or as I said
> > earlier we just don't wait enough in do_load_gpu() or idle_check()
> > in
> > the test, so we read out cur-freq before intel_mark_idle() would
> > get
> > called.
> 
> Maybe because I'm just testing from a console environment, but if I
> change the min freq. via sysfs I don't get any type of call to drop
> back to idle.  Possibly because I'm not generating any GPU load so it
> never re-triggers an idle gpu?  I've waited quite a while and have
> bumped up the time in idle_check().  Waiting for minutes doesn't seem
> reasonable.
> 
> Should intel_mark_idle() get called periodically, even if the rings
> are
> empty?
>   
> > 
> > > The remaining issue seems to be some type of timing issue. I've
> > > had
> > > to
> > > add a 35000us sleep between updating the interrupt enable
> > > register
> > > (0xA168) and the posting read of freq. register (0xA008),
> > > otherwise
> > > it
> > > acts like the change to the interrupt enable register never
> > > happened.
> > > None of the documentation I have indicates that this is needed.
> > 
> > What happens exactly? The frequency gets stuck above min-freq and
> > you
> > never get a down interrupt? Not sure how this could happen, but
> > there
> > is an interesting comment in intel_rps_limits() about this
> > scenario.
> 
> Basically if I set the min freq. through sysfs, it flows through all
> the set threshold, create the interrupt mask code but the system
> behaves as if the new threshold limits or interrupts were not
> changed.
> 
> So say RPn = 100, min = 100, current = 100
>  - echo 383 > /sys/class/drm/card0/gt_min_freq_mhz  (midpoint freq)
> 
> gt_cur_freq_mhz will report 383 now and I don't see any interrupts.
> 
> If I add the delay, then right after I set the min to 383, I'll start
> seeing the down threshold interrupts and it will lower the freq back
> down to RPn (100) after two or three interrupts occur.
> 
> So I don't think this has anything to with RC6 and missing
> interrupts that the comment refers to.
> 
> It took me a while to track this down because I was using a bunch of
> printk's to trace the flow and values.  With the printk's it was
> working but when I removed them, it stopped working. So it was about
> 3
> or 4 printk's worth of delay that was needed.  But I really wonder if
> there's something else that should be checked vs. just delaying. 

Ok, so for both of the above observations: If you write the min-freq
value via sysfs, it will result in cur-freq raised and "getting stuck"
at min-freq if cur-freq was below the new min value. This is a bit
strange since as we already established normally cur-freq should always
drop to idle-freq if the GPU is idle regardless of min-freq. AFAIR the
sysfs entry works in this way, since setting the frequency also changes
the interrupt mask and what we really need is this latter thing. To get
back to the normal drop-to-idle-freq policy again you need to submit
some GPU work after setting the sysfs entry. It's done already in most
of the places in pm_rps, except for few. The following should fix up
those:

 	igt_debug("\nIncrease min above RP0 (invalid)...\n");
@@ -416,6 +420,8 @@ static void min_max_config(void (*check)(void),
bool load_gpu)
 
 	igt_debug("\nDecrease min below RPn (invalid)...\n");
 	writeval_inval(stuff[MIN].filp, 0);
+	if (load_gpu)
+		do_load_gpu();
 	check();
 
 	igt_debug("\nDecrease max to midpoint...\n");
On Fri, 4 Dec 2015 22:58:50 +0200
Imre Deak <imre.deak@intel.com> wrote:

> On Fri, 2015-12-04 at 10:37 -0800, Bob Paauwe wrote:
> > On Fri, 4 Dec 2015 17:22:11 +0200
> > Imre Deak <imre.deak@intel.com> wrote:
> > [...]
> > > > With the BIOS setting corrected, the driver started disabling the
> > > > down
> > > > interrupts once the freq dropped to min or just below because of
> > > > the min
> > > > vs. idle difference.  I have a patch for this that I'll send
> > > > shortly.
> > > 
> > > Hm, that's not necessarily a problem. To reach the idle frequency
> > > we
> > > don't depend on the up/down interrupts. The idle frequency gets set
> > > explicitly from intel_mark_idle(), so if you don't reach the idle
> > > freq
> > > then this function doesn't get called for some reason. Or as I said
> > > earlier we just don't wait enough in do_load_gpu() or idle_check()
> > > in
> > > the test, so we read out cur-freq before intel_mark_idle() would
> > > get
> > > called.
> > 
> > Maybe because I'm just testing from a console environment, but if I
> > change the min freq. via sysfs I don't get any type of call to drop
> > back to idle.  Possibly because I'm not generating any GPU load so it
> > never re-triggers an idle gpu?  I've waited quite a while and have
> > bumped up the time in idle_check().  Waiting for minutes doesn't seem
> > reasonable.
> > 
> > Should intel_mark_idle() get called periodically, even if the rings
> > are
> > empty?
> >   
> > > 
> > > > The remaining issue seems to be some type of timing issue. I've
> > > > had
> > > > to
> > > > add a 35000us sleep between updating the interrupt enable
> > > > register
> > > > (0xA168) and the posting read of freq. register (0xA008),
> > > > otherwise
> > > > it
> > > > acts like the change to the interrupt enable register never
> > > > happened.
> > > > None of the documentation I have indicates that this is needed.
> > > 
> > > What happens exactly? The frequency gets stuck above min-freq and
> > > you
> > > never get a down interrupt? Not sure how this could happen, but
> > > there
> > > is an interesting comment in intel_rps_limits() about this
> > > scenario.
> > 
> > Basically if I set the min freq. through sysfs, it flows through all
> > the set threshold, create the interrupt mask code but the system
> > behaves as if the new threshold limits or interrupts were not
> > changed.
> > 
> > So say RPn = 100, min = 100, current = 100
> >  - echo 383 > /sys/class/drm/card0/gt_min_freq_mhz  (midpoint freq)
> > 
> > gt_cur_freq_mhz will report 383 now and I don't see any interrupts.
> > 
> > If I add the delay, then right after I set the min to 383, I'll start
> > seeing the down threshold interrupts and it will lower the freq back
> > down to RPn (100) after two or three interrupts occur.
> > 
> > So I don't think this has anything to with RC6 and missing
> > interrupts that the comment refers to.
> > 
> > It took me a while to track this down because I was using a bunch of
> > printk's to trace the flow and values.  With the printk's it was
> > working but when I removed them, it stopped working. So it was about
> > 3
> > or 4 printk's worth of delay that was needed.  But I really wonder if
> > there's something else that should be checked vs. just delaying. 
> 
> Ok, so for both of the above observations: If you write the min-freq
> value via sysfs, it will result in cur-freq raised and "getting stuck"
> at min-freq if cur-freq was below the new min value. This is a bit
> strange since as we already established normally cur-freq should always
> drop to idle-freq if the GPU is idle regardless of min-freq. AFAIR the
> sysfs entry works in this way, since setting the frequency also changes
> the interrupt mask and what we really need is this latter thing. To get
> back to the normal drop-to-idle-freq policy again you need to submit
> some GPU work after setting the sysfs entry. 

So we want the policy to be that we'll only drop below min to idle when
the GPU transitions from not idle to idle.  Without that transition,
we'll stay at the user specified min.

> It's done already in most
> of the places in pm_rps, except for few. The following should fix up
> those:
> 
> diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> index 74f08f4..ad06ef0 100644
> --- a/tests/pm_rps.c
> +++ b/tests/pm_rps.c
> @@ -388,10 +388,14 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  
>  	igt_debug("\nIncrease min to midpoint...\n");
>  	writeval(stuff[MIN].filp, fmid);
> +	if (load_gpu)
> +		do_load_gpu();
>  	check();
>  
>  	igt_debug("\nIncrease min to RP0...\n");
>  	writeval(stuff[MIN].filp, origfreqs[RP0]);
> +	if (load_gpu)
> +		do_load_gpu();
>  	check();
>  
>  	igt_debug("\nIncrease min above RP0 (invalid)...\n");
> @@ -416,6 +420,8 @@ static void min_max_config(void (*check)(void),
> bool load_gpu)
>  
>  	igt_debug("\nDecrease min below RPn (invalid)...\n");
>  	writeval_inval(stuff[MIN].filp, 0);
> +	if (load_gpu)
> +		do_load_gpu();
>  	check();
>  
>  	igt_debug("\nDecrease max to midpoint...\n");

Yes, this does make the test pass.  However, what is the expected
behavior in the following:

echo 500 > gt_min_freq_mhz
echo 200 > gt_min_freq_mhz

With no GPU load?

Right now, the current frequency will stay stuck at 500 until there is
GPU load or until I double set the min freq to 200.  For example:

  bxt-test:drm root $ echo 500 > card0/gt_min_freq_mhz 
  bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
  500
  bxt-test:drm root $ echo 200 > card0/gt_min_freq_mhz                         
  bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
  500
  bxt-test:drm root $ echo 200 > card0/gt_min_freq_mhz                         
  bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
  200

If I add a delay before the posting read in gen6_set_rps() then the 
frequency will drop after the first "echo 200 >". It's like the 
gen6_set_rps() is behind.  A slightly more interesting result
is:

echo 500 > min  --> cur = 500
echo 200 > min  --> cur = 500
echo 400 > min  --> cur = 200

Something doesn't seem right.
On pe, 2015-12-04 at 14:41 -0800, Bob Paauwe wrote:
> On Fri, 4 Dec 2015 22:58:50 +0200
> Imre Deak <imre.deak@intel.com> wrote:
> [...]
> So we want the policy to be that we'll only drop below min to idle
> when
> the GPU transitions from not idle to idle.  Without that transition,
> we'll stay at the user specified min.

I would put it, that after an idle->not-idle transition we require that
the frequency drops to the idle frequency. What happens right after
writing a valid value to the min-freq sysfs entry is more loose, it can
end up anywhere between the idle-freq..max(new-min-freq,old-cur-freq)
range.

> > It's done already in most
> > of the places in pm_rps, except for few. The following should fix
> > up
> > those:
> > 
> > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > index 74f08f4..ad06ef0 100644
> > --- a/tests/pm_rps.c
> > +++ b/tests/pm_rps.c
> > @@ -388,10 +388,14 @@ static void min_max_config(void
> > (*check)(void),
> > bool load_gpu)
> >  
> >  	igt_debug("\nIncrease min to midpoint...\n");
> >  	writeval(stuff[MIN].filp, fmid);
> > +	if (load_gpu)
> > +		do_load_gpu();
> >  	check();
> >  
> >  	igt_debug("\nIncrease min to RP0...\n");
> >  	writeval(stuff[MIN].filp, origfreqs[RP0]);
> > +	if (load_gpu)
> > +		do_load_gpu();
> >  	check();
> >  
> >  	igt_debug("\nIncrease min above RP0 (invalid)...\n");
> > @@ -416,6 +420,8 @@ static void min_max_config(void (*check)(void),
> > bool load_gpu)
> >  
> >  	igt_debug("\nDecrease min below RPn (invalid)...\n");
> >  	writeval_inval(stuff[MIN].filp, 0);
> > +	if (load_gpu)
> > +		do_load_gpu();
> >  	check();
> >  
> >  	igt_debug("\nDecrease max to midpoint...\n");
> 
> Yes, this does make the test pass.

Ok, this is according to expectations then. We don't actually need the
last chunk, since for invalid settings there should be no change to
cur-freq. So could you follow up with a cleaned up patch for this?

> However, what is the expected behavior in the following:
> 
> echo 500 > gt_min_freq_mhz
> echo 200 > gt_min_freq_mhz
> 
> With no GPU load?
> 
> Right now, the current frequency will stay stuck at 500 until there
> is
> GPU load or until I double set the min freq to 200.  For example:
> 
>   bxt-test:drm root $ echo 500 > card0/gt_min_freq_mhz 
>   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
>   500
>   bxt-test:drm root $ echo 200 >
> card0/gt_min_freq_mhz                         
>   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
>   500
>   bxt-test:drm root $ echo 200 >
> card0/gt_min_freq_mhz                         
>   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
>   200
> 
> If I add a delay before the posting read in gen6_set_rps() then the 
> frequency will drop after the first "echo 200 >". It's like the 
> gen6_set_rps() is behind.  A slightly more interesting result
> is:
> 
> echo 500 > min  --> cur = 500
> echo 200 > min  --> cur = 500
> echo 400 > min  --> cur = 200
> 
> Something doesn't seem right. 

All the above can be explained by two things:
1. gt_min_freq_mhz_store() will not change cur-freq if cur-freq is
already within the new min-freq..max-freq change. Otherwise it will
raise cur-freq to the new min-freq value.
2. gt_min_freq_mhz_store() will access the HW whenever there is a valid
min-freq passed in (so even if it just needs to reset the old cur-freq
based on point 1.), so that the RPS interrupt mask is reprogrammed
according to the new min-freq value. This HW access in turn can
generate RPS down interrupts (even though the GPU is already idle)
which can lower cur-freq as low as the min-freq value
gen6_pm_rps_work() currently sees.

All-in-all what we care about here is that cur-freq drops to idle-freq
after an idle->not-idle transition and we can ignore the cur-freq value
right after we wrote to the sysfs entry. Based on your tests the patch
above would ensure that.

--Imre
On Mon, 7 Dec 2015 17:00:20 +0200
Imre Deak <imre.deak@intel.com> wrote:

> On pe, 2015-12-04 at 14:41 -0800, Bob Paauwe wrote:
> > On Fri, 4 Dec 2015 22:58:50 +0200
> > Imre Deak <imre.deak@intel.com> wrote:
> > [...]
> > So we want the policy to be that we'll only drop below min to idle
> > when
> > the GPU transitions from not idle to idle.  Without that transition,
> > we'll stay at the user specified min.
> 
> I would put it, that after an idle->not-idle transition we require that
> the frequency drops to the idle frequency. What happens right after
> writing a valid value to the min-freq sysfs entry is more loose, it can
> end up anywhere between the idle-freq..max(new-min-freq,old-cur-freq)
> range.
> 
> > > It's done already in most
> > > of the places in pm_rps, except for few. The following should fix
> > > up
> > > those:
> > > 
> > > diff --git a/tests/pm_rps.c b/tests/pm_rps.c
> > > index 74f08f4..ad06ef0 100644
> > > --- a/tests/pm_rps.c
> > > +++ b/tests/pm_rps.c
> > > @@ -388,10 +388,14 @@ static void min_max_config(void
> > > (*check)(void),
> > > bool load_gpu)
> > >  
> > >  	igt_debug("\nIncrease min to midpoint...\n");
> > >  	writeval(stuff[MIN].filp, fmid);
> > > +	if (load_gpu)
> > > +		do_load_gpu();
> > >  	check();
> > >  
> > >  	igt_debug("\nIncrease min to RP0...\n");
> > >  	writeval(stuff[MIN].filp, origfreqs[RP0]);
> > > +	if (load_gpu)
> > > +		do_load_gpu();
> > >  	check();
> > >  
> > >  	igt_debug("\nIncrease min above RP0 (invalid)...\n");
> > > @@ -416,6 +420,8 @@ static void min_max_config(void (*check)(void),
> > > bool load_gpu)
> > >  
> > >  	igt_debug("\nDecrease min below RPn (invalid)...\n");
> > >  	writeval_inval(stuff[MIN].filp, 0);
> > > +	if (load_gpu)
> > > +		do_load_gpu();
> > >  	check();
> > >  
> > >  	igt_debug("\nDecrease max to midpoint...\n");
> > 
> > Yes, this does make the test pass.
> 
> Ok, this is according to expectations then. We don't actually need the
> last chunk, since for invalid settings there should be no change to
> cur-freq. So could you follow up with a cleaned up patch for this?
> 
> > However, what is the expected behavior in the following:
> > 
> > echo 500 > gt_min_freq_mhz
> > echo 200 > gt_min_freq_mhz
> > 
> > With no GPU load?
> > 
> > Right now, the current frequency will stay stuck at 500 until there
> > is
> > GPU load or until I double set the min freq to 200.  For example:
> > 
> >   bxt-test:drm root $ echo 500 > card0/gt_min_freq_mhz 
> >   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
> >   500
> >   bxt-test:drm root $ echo 200 >
> > card0/gt_min_freq_mhz                         
> >   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
> >   500
> >   bxt-test:drm root $ echo 200 >
> > card0/gt_min_freq_mhz                         
> >   bxt-test:drm root $ cat card0/gt_cur_freq_mhz 
> >   200
> > 
> > If I add a delay before the posting read in gen6_set_rps() then the 
> > frequency will drop after the first "echo 200 >". It's like the 
> > gen6_set_rps() is behind.  A slightly more interesting result
> > is:
> > 
> > echo 500 > min  --> cur = 500
> > echo 200 > min  --> cur = 500
> > echo 400 > min  --> cur = 200
> > 
> > Something doesn't seem right. 
> 
> All the above can be explained by two things:
> 1. gt_min_freq_mhz_store() will not change cur-freq if cur-freq is
> already within the new min-freq..max-freq change. Otherwise it will
> raise cur-freq to the new min-freq value.
> 2. gt_min_freq_mhz_store() will access the HW whenever there is a valid
> min-freq passed in (so even if it just needs to reset the old cur-freq
> based on point 1.), so that the RPS interrupt mask is reprogrammed
> according to the new min-freq value. This HW access in turn can
> generate RPS down interrupts (even though the GPU is already idle)
> which can lower cur-freq as low as the min-freq value
> gen6_pm_rps_work() currently sees.
> 
> All-in-all what we care about here is that cur-freq drops to idle-freq
> after an idle->not-idle transition and we can ignore the cur-freq value
> right after we wrote to the sysfs entry. Based on your tests the patch
> above would ensure that.
> 
> --Imre

OK, that makes sense.  Thanks for taking the time to explain all this.
I'll send out the patch to pm_rps.c shortly.

Bob
On 12/4/2015 8:52 PM, Imre Deak wrote:
> On to, 2015-12-03 at 16:43 -0800, Bob Paauwe wrote:
>> On Tue, 1 Dec 2015 19:43:05 +0200
>> Imre Deak <imre.deak@intel.com> wrote:
>>
>>> On ti, 2015-12-01 at 09:22 -0800, Bob Paauwe wrote:
>>>> On Tue, 1 Dec 2015 15:56:55 +0200
>>>> Imre Deak <imre.deak@intel.com> wrote:
>>>>
>>>>> On ma, 2015-11-30 at 16:23 -0800, Bob Paauwe wrote:
>>>>>> Now that the frequency can drop below the user specified
>>>>>> minimum
>>>>>> when
>>>>>> the gpu is idle, add some checking to verify that it really
>>>>>> does
>>>>>> drop
>>>>>> down to the RPn frequency in specific cases.
>>>>>>
>>>>>> To do this, modify the main test flow to take two 'check'
>>>>>> routines
>>>>>> instead of one. When running the config-min-max-idle subtest
>>>>>> make
>>>>>> use of the second check routine to verify that the frequency
>>>>>> drops
>>>>>> to RPn instead of simply <= user min frequency.  For all
>>>>>> other
>>>>>> subtests, use the original check routines for both checks.
>>>>>>
>>>>>> Signed-off-by: Bob Paauwe <bob.j.paauwe@intel.com>
>>>>> I don't see the point. The frequency should always drop to the
>>>>> idle
>>>>> frequency if the GPU is idle, it's not enough that it's below
>>>>> MIN-
>>>>> freq.
>>>>> So why do we need separate CUR-freq<=MIN-freq and CUR-
>>>>> freq==idle-
>>>>> freq
>>>>> checks?
>>>> I started from the premise that it should always drop to idle but
>>>> that's just not the case.
>>> It is the correct premise and if it doesn't hold then there is a
>>> real
>>> bug either in the testcase or the kernel which needs to be
>>> addressed
>>> differently. I haven't found anything concrete but one suspect is
>>> the
>>> logic that waits for the GPU idle condition, maybe the timeout
>>> value idle_check() or the hard-coded duration in do_load_gpu() is
>>> incorrect. In any case let's not paper over this issue, the very
>>> reason
>>> we have test cases is to uncover such bugs.
>>>
>>>> The min_max_config() function is used for
>>>> both the idle and loaded subtests.  If you try to check for
>>>> freq=idle
>>>> when doing the loaded subtest it will fail since it never goes
>>>> idle.
>>>> Even in the idle subtest there are cases where it doesn't drop
>>>> down
>>>> to
>>>> idle.
>>> The only place we should check for freq==idle is in idle_check()
>>> and
>>> that one is not called during the loaded subtest, it wouldn't even
>>> make
>>> sense to do so. So I'm not sure how this subtest fails for you.
>>>
>>>> I suppose I could duplicate min_max_config and have a
>>>> min_max_config_idle() and min_max_conifg_not_idle() for use by
>>>> the
>>>> different subtests.
>>> The point of the "check" argument of min_max_config() is to
>>> distinguish
>>> between the idle and loaded cases. The check functions passed in
>>> know
>>> already if they can expect the frequency to reach the idle
>>> frequency
>>> or not.
>>>
>>>> The real problem is that the test was not designed to handle the
>>>> case
>>>> where the freq could drop below min and probably needs to be
>>>> re-designed.  I've been trying to find a quick fix vs. a complete
>>>> overhaul as this isn't really a priority for me.
>>> I think we only need your first patch and if there is any failure
>>> after
>>> that we have to root cause that and fix it properly.
>>>
>>> --Imre
>> You're right.  I'm working with BXT and it seems like it's got some
>> unique issues with RPS.  I just sent a new patch based on the first
>> one
>> but with the changes you suggested to check for ==idle instead of
>> <=min.
>>
>> Maybe you have some insights into what I'm seeing with BXT?  The first
>> problem I had was that I would see threshold up interrupts but not any
>> threshold down interrupts.  The missing down interrupts was related to
>> the BIOS setting.  I had runtime PM disabled so it seems strange that I
>> was getting the up interrupts.
How was runtime pM disabled? Think RPM and RPS are not related.
> Yes, I saw this too. I wonder if we could check this somehow and not
> enable RPS if BIOS hasn't set things up properly. Sagar has a patch
> that checks the RC6 setup [1]; that's not exactly RPS, but since they
> are setup at the same place I think we could use that for now for RPS
> too.
FYI Turbo is disabled until A1 due to issues with RC6 enabled. Which 
registers exactly do we need to check
from BIOS RPS programming perspective? I see that RP control is set by 
BIOS ... Is that check enough?
>
>> With the BIOS setting corrected, the driver started disabling the down
>> interrupts once the freq dropped to min or just below because of the min
>> vs. idle difference.  I have a patch for this that I'll send shortly.
> Hm, that's not necessarily a problem. To reach the idle frequency we
> don't depend on the up/down interrupts. The idle frequency gets set
> explicitly from intel_mark_idle(), so if you don't reach the idle freq
> then this function doesn't get called for some reason. Or as I said
> earlier we just don't wait enough in do_load_gpu() or idle_check() in
> the test, so we read out cur-freq before intel_mark_idle() would get
> called.
>
>> The remaining issue seems to be some type of timing issue. I've had
>> to
>> add a 35000us sleep between updating the interrupt enable register
>> (0xA168) and the posting read of freq. register (0xA008), otherwise
>> it
>> acts like the change to the interrupt enable register never happened.
>> None of the documentation I have indicates that this is needed.
> What happens exactly? The frequency gets stuck above min-freq and you
> never get a down interrupt? Not sure how this could happen, but there
> is an interesting comment in intel_rps_limits() about this scenario.
>
> --Imre
>
> [1] http://lists.freedesktop.org/archives/intel-gfx/2015-November/07938
> 6.html
>
>
>> Bob
>>
>>
>>>>>> ---
>>>>>>   tests/pm_rps.c | 47 ++++++++++++++++++++++++++++++++++----
>>>>>> ----
>>>>>> -----
>>>>>>   1 file changed, 34 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/tests/pm_rps.c b/tests/pm_rps.c
>>>>>> index f919625..96225ce 100644
>>>>>> --- a/tests/pm_rps.c
>>>>>> +++ b/tests/pm_rps.c
>>>>>> @@ -364,7 +364,7 @@ static int get_hw_rounded_freq(int
>>>>>> target)
>>>>>>   	return ret;
>>>>>>   }
>>>>>>   
>>>>>> -static void min_max_config(void (*check)(void), bool
>>>>>> load_gpu)
>>>>>> +static void min_max_config(void (*check)(void), void
>>>>>> (*check2)(void), bool load_gpu)
>>>>>>   {
>>>>>>   	int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
>>>>>>   
>>>>>> @@ -384,7 +384,7 @@ static void min_max_config(void
>>>>>> (*check)(void),
>>>>>> bool load_gpu)
>>>>>>   	writeval(stuff[MAX].filp, origfreqs[RP0]);
>>>>>>   	if (load_gpu)
>>>>>>   		do_load_gpu();
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nIncrease min to midpoint...\n");
>>>>>>   	writeval(stuff[MIN].filp, fmid);
>>>>>> @@ -412,7 +412,7 @@ static void min_max_config(void
>>>>>> (*check)(void),
>>>>>> bool load_gpu)
>>>>>>   	writeval(stuff[MIN].filp, origfreqs[RPn]);
>>>>>>   	if (load_gpu)
>>>>>>   		do_load_gpu();
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nDecrease min below RPn
>>>>>> (invalid)...\n");
>>>>>>   	writeval_inval(stuff[MIN].filp, 0);
>>>>>> @@ -420,11 +420,11 @@ static void min_max_config(void
>>>>>> (*check)(void),
>>>>>> bool load_gpu)
>>>>>>   
>>>>>>   	igt_debug("\nDecrease max to midpoint...\n");
>>>>>>   	writeval(stuff[MAX].filp, fmid);
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nDecrease max to RPn...\n");
>>>>>>   	writeval(stuff[MAX].filp, origfreqs[RPn]);
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nDecrease max below RPn
>>>>>> (invalid)...\n");
>>>>>>   	writeval_inval(stuff[MAX].filp, 0);
>>>>>> @@ -436,11 +436,11 @@ static void min_max_config(void
>>>>>> (*check)(void),
>>>>>> bool load_gpu)
>>>>>>   
>>>>>>   	igt_debug("\nIncrease max to midpoint...\n");
>>>>>>   	writeval(stuff[MAX].filp, fmid);
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nIncrease max to RP0...\n");
>>>>>>   	writeval(stuff[MAX].filp, origfreqs[RP0]);
>>>>>> -	check();
>>>>>> +	check2();
>>>>>>   
>>>>>>   	igt_debug("\nIncrease max above RP0
>>>>>> (invalid)...\n");
>>>>>>   	writeval_inval(stuff[MAX].filp, origfreqs[RP0] +
>>>>>> 1000);
>>>>>> @@ -461,7 +461,7 @@ static void basic_check(void)
>>>>>>   
>>>>>>   #define IDLE_WAIT_TIMESTEP_MSEC 100
>>>>>>   #define IDLE_WAIT_TIMEOUT_MSEC 10000
>>>>>> -static void idle_check(void)
>>>>>> +static void idle_check_min(void)
>>>>>>   {
>>>>>>   	int freqs[NUMFREQ];
>>>>>>   	int wait = 0;
>>>>>> @@ -482,6 +482,27 @@ static void idle_check(void)
>>>>>>   	igt_debug("Required %d msec to reach cur<=min\n",
>>>>>> wait);
>>>>>>   }
>>>>>>   
>>>>>> +static void idle_check_idle(void)
>>>>>> +{
>>>>>> +	int freqs[NUMFREQ];
>>>>>> +	int wait = 0;
>>>>>> +
>>>>>> +	/* Monitor frequencies until cur settles down to
>>>>>> min,
>>>>>> which
>>>>>> should
>>>>>> +	 * happen within the allotted time */
>>>>>> +	do {
>>>>>> +		read_freqs(freqs);
>>>>>> +		dump(freqs);
>>>>>> +		checkit(freqs);
>>>>>> +		if (freqs[CUR] == freqs[RPn])
>>>>>> +			break;
>>>>>> +		usleep(1000 * IDLE_WAIT_TIMESTEP_MSEC);
>>>>>> +		wait += IDLE_WAIT_TIMESTEP_MSEC;
>>>>>> +	} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
>>>>>> +
>>>>>> +	igt_assert_eq(freqs[CUR], freqs[RPn]);
>>>>>> +	igt_debug("Required %d msec to reach cur=idle\n",
>>>>>> wait);
>>>>>> +}
>>>>>> +
>>>>>>   #define LOADED_WAIT_TIMESTEP_MSEC 100
>>>>>>   #define LOADED_WAIT_TIMEOUT_MSEC 3000
>>>>>>   static void loaded_check(void)
>>>>>> @@ -577,7 +598,7 @@ static void reset(void)
>>>>>>   
>>>>>>   	igt_debug("Removing load...\n");
>>>>>>   	load_helper_stop();
>>>>>> -	idle_check();
>>>>>> +	idle_check_min();
>>>>>>   }
>>>>>>   
>>>>>>   /*
>>>>>> @@ -635,7 +656,7 @@ static void blocking(void)
>>>>>>   	matchit(pre_freqs, post_freqs);
>>>>>>   
>>>>>>   	igt_debug("Removing load...\n");
>>>>>> -	idle_check();
>>>>>> +	idle_check_min();
>>>>>>   }
>>>>>>   
>>>>>>   static void pm_rps_exit_handler(int sig)
>>>>>> @@ -686,14 +707,14 @@ igt_main
>>>>>>   	}
>>>>>>   
>>>>>>   	igt_subtest("basic-api")
>>>>>> -		min_max_config(basic_check, false);
>>>>>> +		min_max_config(basic_check, basic_check,
>>>>>> false);
>>>>>>   
>>>>>>   	igt_subtest("min-max-config-idle")
>>>>>> -		min_max_config(idle_check, true);
>>>>>> +		min_max_config(idle_check_min,
>>>>>> idle_check_idle,
>>>>>> true);
>>>>>>   
>>>>>>   	igt_subtest("min-max-config-loaded") {
>>>>>>   		load_helper_run(HIGH);
>>>>>> -		min_max_config(loaded_check, false);
>>>>>> +		min_max_config(loaded_check, loaded_check,
>>>>>> false);
>>>>>>   		load_helper_stop();
>>>>>>   	}
>>>>>>   
>>>>
>>>>
>>
>>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx