[v7,6/7] drm/i915/tgl: switch between dc3co and dc5 based on display idleness

Submitted by Anshuamn Gupta on Sept. 7, 2019, 5:14 p.m.

Details

Message ID 20190907171443.16181-7-anshuman.gupta@intel.com
State New
Headers show
Series "DC3CO Support for TGL" ( rev: 8 7 ) in Intel GFX

Browsing this patch as part of:
"DC3CO Support for TGL" rev 7 in Intel GFX
<< prev patch [6/7] next patch >>

Commit Message

Anshuamn Gupta Sept. 7, 2019, 5:14 p.m.
DC3CO is useful power state, when DMC detects PSR2 idle frame
while an active video playback, playing 30fps video on 60hz panel
is the classic example of this use case.
DC5 and DC6 saves more power, but can't be entered during video
playback because there are not enough idle frames in a row to meet
most PSR2 panel deep sleep entry requirement typically 4 frames.

It will be worthy to enable DC3CO after completion of each flip
and switch back to DC5 when display is idle, as driver doesn't
differentiate between video playback and a normal flip.
It is safer to allow DC5 after 6 idle frame, as PSR2 requires
minimum 6 idle frame.

v2: calculated s/w state to switch over dc3co when there is an
    update. [Imre]
    used cancel_delayed_work_sync() in order to avoid any race
    with already scheduled delayed work. [Imre]
v3: cancel_delayed_work_sync() may blocked the commit work.
    Hence dropping it, dc5_idle_thread() checks the valid wakeref before
    putting the reference count, which avoids any chances of dropping
    a zero wakeref. [Imre (IRC)]
v4: use frontbuffer flush mechanism. [Imre]

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  2 +
 .../drm/i915/display/intel_display_power.c    | 79 +++++++++++++++++++
 .../drm/i915/display/intel_display_power.h    |  6 ++
 .../gpu/drm/i915/display/intel_frontbuffer.c  |  1 +
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 5 files changed, 89 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 84488f87d058..2754e8ee693f 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -16206,6 +16206,7 @@  int intel_modeset_init(struct drm_device *dev)
 	init_llist_head(&dev_priv->atomic_helper.free_list);
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
 		  intel_atomic_helper_free_state_worker);
+	INIT_DELAYED_WORK(&dev_priv->csr.idle_work, tgl_dc5_idle_thread);
 
 	intel_init_quirks(dev_priv);
 
@@ -17147,6 +17148,7 @@  void intel_modeset_driver_remove(struct drm_device *dev)
 	flush_workqueue(dev_priv->modeset_wq);
 
 	flush_work(&dev_priv->atomic_helper.free_work);
+	flush_delayed_work(&dev_priv->csr.idle_work);
 	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
 
 	/*
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index ecce118b5b0e..c110f04c9733 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -20,6 +20,7 @@ 
 #include "intel_sideband.h"
 #include "intel_tc.h"
 #include "intel_pm.h"
+#include "intel_psr.h"
 
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 					 enum i915_power_well_id power_well_id);
@@ -773,6 +774,27 @@  static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
 	dev_priv->csr.dc_state = val & mask;
 }
 
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+{
+	u32 pixel_rate, crtc_htotal, crtc_vtotal;
+	u32 frametime_us;
+
+	if (!cstate || !cstate->base.active)
+		return 0;
+
+	pixel_rate = cstate->pixel_rate;
+
+	if (WARN_ON(pixel_rate == 0))
+		return 0;
+
+	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
+	crtc_vtotal = cstate->base.adjusted_mode.crtc_vtotal;
+	frametime_us = DIV_ROUND_UP(crtc_htotal * crtc_vtotal * 1000ULL,
+				    pixel_rate);
+
+	return frametime_us;
+}
+
 void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
 {
 	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
@@ -817,6 +839,49 @@  void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
 	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
 }
 
+void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
+		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
+{
+	struct intel_crtc_state *cstate;
+	u32 delay;
+	unsigned int busy_frontbuffer_bits;
+
+	if (!IS_TIGERLAKE(dev_priv))
+		return;
+
+	if (origin != ORIGIN_FLIP)
+		return;
+
+	if (!dev_priv->csr.dc3co_crtc)
+		return;
+
+	cstate = to_intel_crtc_state(dev_priv->csr.dc3co_crtc->base.state);
+	frontbuffer_bits &=
+		INTEL_FRONTBUFFER_ALL_MASK(dev_priv->csr.dc3co_crtc->pipe);
+	busy_frontbuffer_bits &= ~frontbuffer_bits;
+
+	mutex_lock(&dev_priv->psr.lock);
+
+	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
+		goto unlock;
+
+	/*
+	 * At every flip frontbuffer flush first cancel the delayed work,
+	 * when delayed schedules that means display has been idle
+	 * for the 6 idle frame.
+	 */
+	if (!busy_frontbuffer_bits) {
+		cancel_delayed_work(&dev_priv->csr.idle_work);
+		tgl_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO, false);
+		delay = DC5_REQ_IDLE_FRAMES * intel_get_frame_time_us(cstate);
+		schedule_delayed_work(&dev_priv->csr.idle_work,
+				      usecs_to_jiffies(delay));
+	}
+
+unlock:
+	mutex_unlock(&dev_priv->psr.lock);
+}
+
 static bool tgl_dc3co_is_edp_connected(struct intel_crtc_state  *crtc_state)
 {
 	struct drm_atomic_state *state = crtc_state->base.state;
@@ -880,6 +945,14 @@  void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state)
 	}
 }
 
+void tgl_dc5_idle_thread(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), csr.idle_work.work);
+
+	tgl_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6, true);
+}
+
 static void tgl_allow_dc3co(struct drm_i915_private *dev_priv)
 {
 	if (!dev_priv->psr.sink_psr2_support)
@@ -1155,6 +1228,9 @@  void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
 	if (state == dev_priv->csr.max_dc_state)
 		goto unlock;
 
+	if (!psr2_deep_sleep)
+		tgl_psr2_deep_sleep_disable(dev_priv);
+
 	if (!dc_off_enabled) {
 		/*
 		 * Need to disable the DC off power well to
@@ -1167,6 +1243,9 @@  void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
 	}
 		dev_priv->csr.max_dc_state = state;
 
+	if (psr2_deep_sleep)
+		tgl_psr2_deep_sleep_enable(dev_priv);
+
 unlock:
 	mutex_unlock(&power_domains->lock);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index d77a5a53f543..df096d64c744 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -9,6 +9,9 @@ 
 #include "intel_display.h"
 #include "intel_runtime_pm.h"
 #include "i915_reg.h"
+#include "intel_frontbuffer.h"
+
+#define DC5_REQ_IDLE_FRAMES	6
 
 struct drm_i915_private;
 struct intel_encoder;
@@ -266,6 +269,9 @@  void tgl_dc3co_crtc_compute_config(struct drm_i915_private *dev_priv,
 void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state);
 void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
 void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
+void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
+		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
+void tgl_dc5_idle_thread(struct work_struct *work);
 
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain);
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index fc40dc1fdbcc..c3b10f6e4382 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -90,6 +90,7 @@  static void frontbuffer_flush(struct drm_i915_private *i915,
 	might_sleep();
 	intel_edp_drrs_flush(i915, frontbuffer_bits);
 	intel_psr_flush(i915, frontbuffer_bits, origin);
+	tgl_dc3co_flush(i915, frontbuffer_bits, origin);
 	intel_fbc_flush(i915, frontbuffer_bits, origin);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3218b0319852..fe71119a458e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -338,6 +338,7 @@  struct intel_csr {
 	u32 dc_state;
 	u32 max_dc_state;
 	u32 allowed_dc_mask;
+	struct delayed_work idle_work;
 	intel_wakeref_t wakeref;
 	/* cache the crtc on which DC3CO will be allowed */
 	struct intel_crtc *dc3co_crtc;

Comments

On Sat, Sep 07, 2019 at 10:44:42PM +0530, Anshuman Gupta wrote:
> DC3CO is useful power state, when DMC detects PSR2 idle frame
> while an active video playback, playing 30fps video on 60hz panel
> is the classic example of this use case.
> DC5 and DC6 saves more power, but can't be entered during video
> playback because there are not enough idle frames in a row to meet
> most PSR2 panel deep sleep entry requirement typically 4 frames.

Please also explain why DC3co will be enabled only for flips but not for
other frontbuffer flush events (ORIGIN_CS/DIRTYFB etc.) and that we
could enable it for those too by switching to manual PSR tracking and
programming only 1 idle frame for deep sleep (see below).

Also explaining that the frontbuffer invalidate event doesn't need to be
acted on (b/c of PSR exit) would be helpful.

> 
> It will be worthy to enable DC3CO after completion of each flip
> and switch back to DC5 when display is idle, as driver doesn't
> differentiate between video playback and a normal flip.
> It is safer to allow DC5 after 6 idle frame, as PSR2 requires
> minimum 6 idle frame.

It would be clearer to say here that after a flip we enable DC3co, after
which we wait manually 6 frames (by scheduling the idle frame work) at
which point we enable PSR deep sleep with 6 idle frames. After this 6
idle frames the HW will enter deep sleep and DC5 will be entered
after this by DMC at some point.

The claim that we _have_ to make the HW wait for 6 idle frames before it
enters deep sleep doesn't make much sense to me, would be good to see a
reference to that if it really exists. That setting seems to only serve
the purpose to avoid update lags, but in the future (as discussed with
Ville) we should switch to manual PSR tracking and for that we would
program the HW to wait only 1 idle frame before entering deep sleep and
rely only on the manual 6 idle frame wait (via the idle frame work) to
avoid update lags.

> 
> v2: calculated s/w state to switch over dc3co when there is an
>     update. [Imre]
>     used cancel_delayed_work_sync() in order to avoid any race
>     with already scheduled delayed work. [Imre]
> v3: cancel_delayed_work_sync() may blocked the commit work.
>     Hence dropping it, dc5_idle_thread() checks the valid wakeref before
>     putting the reference count, which avoids any chances of dropping
>     a zero wakeref. [Imre (IRC)]
> v4: use frontbuffer flush mechanism. [Imre]
> 
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Animesh Manna <animesh.manna@intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_display.c  |  2 +
>  .../drm/i915/display/intel_display_power.c    | 79 +++++++++++++++++++
>  .../drm/i915/display/intel_display_power.h    |  6 ++
>  .../gpu/drm/i915/display/intel_frontbuffer.c  |  1 +
>  drivers/gpu/drm/i915/i915_drv.h               |  1 +
>  5 files changed, 89 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 84488f87d058..2754e8ee693f 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -16206,6 +16206,7 @@ int intel_modeset_init(struct drm_device *dev)
>  	init_llist_head(&dev_priv->atomic_helper.free_list);
>  	INIT_WORK(&dev_priv->atomic_helper.free_work,
>  		  intel_atomic_helper_free_state_worker);
> +	INIT_DELAYED_WORK(&dev_priv->csr.idle_work, tgl_dc5_idle_thread);
>  
>  	intel_init_quirks(dev_priv);
>  
> @@ -17147,6 +17148,7 @@ void intel_modeset_driver_remove(struct drm_device *dev)
>  	flush_workqueue(dev_priv->modeset_wq);
>  
>  	flush_work(&dev_priv->atomic_helper.free_work);
> +	flush_delayed_work(&dev_priv->csr.idle_work);

This is racy as the work could be still running, but also would leave a
few other places with the work running like suspend, so let's just make
sure that it's not running any more after encoder disabling.

>  	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
>  
>  	/*
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> index ecce118b5b0e..c110f04c9733 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> @@ -20,6 +20,7 @@
>  #include "intel_sideband.h"
>  #include "intel_tc.h"
>  #include "intel_pm.h"
> +#include "intel_psr.h"
>  
>  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
>  					 enum i915_power_well_id power_well_id);
> @@ -773,6 +774,27 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
>  	dev_priv->csr.dc_state = val & mask;
>  }
>  
> +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
> +{
> +	u32 pixel_rate, crtc_htotal, crtc_vtotal;
> +	u32 frametime_us;
> +
> +	if (!cstate || !cstate->base.active)
> +		return 0;
> +
> +	pixel_rate = cstate->pixel_rate;
> +
> +	if (WARN_ON(pixel_rate == 0))
> +		return 0;
> +
> +	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
> +	crtc_vtotal = cstate->base.adjusted_mode.crtc_vtotal;
> +	frametime_us = DIV_ROUND_UP(crtc_htotal * crtc_vtotal * 1000ULL,
> +				    pixel_rate);
> +
> +	return frametime_us;
> +}
> +
>  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
> @@ -817,6 +839,49 @@ void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
>  	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
>  }
>  
> +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> +		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
> +{
> +	struct intel_crtc_state *cstate;
> +	u32 delay;
> +	unsigned int busy_frontbuffer_bits;
> +
> +	if (!IS_TIGERLAKE(dev_priv))
> +		return;
> +
> +	if (origin != ORIGIN_FLIP)
> +		return;
> +
> +	if (!dev_priv->csr.dc3co_crtc)
> +		return;
> +
> +	cstate = to_intel_crtc_state(dev_priv->csr.dc3co_crtc->base.state);
> +	frontbuffer_bits &=
> +		INTEL_FRONTBUFFER_ALL_MASK(dev_priv->csr.dc3co_crtc->pipe);
> +	busy_frontbuffer_bits &= ~frontbuffer_bits;

Hrm, this looks wrong. Also it depends on the PSR mechanism, so this
whole DC3co flush logic should rather be done from the PSR flush func,
using psr.pipe.

> +
> +	mutex_lock(&dev_priv->psr.lock);
> +
> +	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
> +		goto unlock;
> +
> +	/*
> +	 * At every flip frontbuffer flush first cancel the delayed work,
> +	 * when delayed schedules that means display has been idle
> +	 * for the 6 idle frame.
> +	 */
> +	if (!busy_frontbuffer_bits) {
> +		cancel_delayed_work(&dev_priv->csr.idle_work);

The above is racy.

> +		tgl_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO, false);
> +		delay = DC5_REQ_IDLE_FRAMES * intel_get_frame_time_us(cstate);
> +		schedule_delayed_work(&dev_priv->csr.idle_work,
> +				      usecs_to_jiffies(delay));
> +	}
> +
> +unlock:
> +	mutex_unlock(&dev_priv->psr.lock);
> +}
> +
>  static bool tgl_dc3co_is_edp_connected(struct intel_crtc_state  *crtc_state)
>  {
>  	struct drm_atomic_state *state = crtc_state->base.state;
> @@ -880,6 +945,14 @@ void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state)
>  	}
>  }
>  
> +void tgl_dc5_idle_thread(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, typeof(*dev_priv), csr.idle_work.work);
> +
> +	tgl_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6, true);

So it would result in enabling deep sleep, but without the PSR lock.
That's one reason we should really keep the PSR specific parts here.

> +}
> +
>  static void tgl_allow_dc3co(struct drm_i915_private *dev_priv)
>  {
>  	if (!dev_priv->psr.sink_psr2_support)
> @@ -1155,6 +1228,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
>  	if (state == dev_priv->csr.max_dc_state)
>  		goto unlock;
>  
> +	if (!psr2_deep_sleep)
> +		tgl_psr2_deep_sleep_disable(dev_priv);
> +
>  	if (!dc_off_enabled) {
>  		/*
>  		 * Need to disable the DC off power well to
> @@ -1167,6 +1243,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
>  	}
>  		dev_priv->csr.max_dc_state = state;
>  
> +	if (psr2_deep_sleep)
> +		tgl_psr2_deep_sleep_enable(dev_priv);
> +
>  unlock:
>  	mutex_unlock(&power_domains->lock);
>  }
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> index d77a5a53f543..df096d64c744 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -9,6 +9,9 @@
>  #include "intel_display.h"
>  #include "intel_runtime_pm.h"
>  #include "i915_reg.h"
> +#include "intel_frontbuffer.h"
> +
> +#define DC5_REQ_IDLE_FRAMES	6

No need for a define for this.

>  
>  struct drm_i915_private;
>  struct intel_encoder;
> @@ -266,6 +269,9 @@ void tgl_dc3co_crtc_compute_config(struct drm_i915_private *dev_priv,
>  void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state);
>  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
>  void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> +		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
> +void tgl_dc5_idle_thread(struct work_struct *work);
>  
>  const char *
>  intel_display_power_domain_str(enum intel_display_power_domain domain);
> diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> index fc40dc1fdbcc..c3b10f6e4382 100644
> --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> @@ -90,6 +90,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
>  	might_sleep();
>  	intel_edp_drrs_flush(i915, frontbuffer_bits);
>  	intel_psr_flush(i915, frontbuffer_bits, origin);
> +	tgl_dc3co_flush(i915, frontbuffer_bits, origin);
>  	intel_fbc_flush(i915, frontbuffer_bits, origin);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3218b0319852..fe71119a458e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -338,6 +338,7 @@ struct intel_csr {
>  	u32 dc_state;
>  	u32 max_dc_state;
>  	u32 allowed_dc_mask;
> +	struct delayed_work idle_work;
>  	intel_wakeref_t wakeref;
>  	/* cache the crtc on which DC3CO will be allowed */
>  	struct intel_crtc *dc3co_crtc;
> -- 
> 2.21.0
>
On 2019-09-08 at 20:55:17 +0300, Imre Deak wrote:
Hi Imre ,
Thanks for review, could you please provide your response on below
comments.
> On Sat, Sep 07, 2019 at 10:44:42PM +0530, Anshuman Gupta wrote:
> > DC3CO is useful power state, when DMC detects PSR2 idle frame
> > while an active video playback, playing 30fps video on 60hz panel
> > is the classic example of this use case.
> > DC5 and DC6 saves more power, but can't be entered during video
> > playback because there are not enough idle frames in a row to meet
> > most PSR2 panel deep sleep entry requirement typically 4 frames.
> 
> Please also explain why DC3co will be enabled only for flips but not for
> other frontbuffer flush events (ORIGIN_CS/DIRTYFB etc.) and that we
> could enable it for those too by switching to manual PSR tracking and
> programming only 1 idle frame for deep sleep (see below).
> 
> Also explaining that the frontbuffer invalidate event doesn't need to be
> acted on (b/c of PSR exit) would be helpful.
> 
> > 
> > It will be worthy to enable DC3CO after completion of each flip
> > and switch back to DC5 when display is idle, as driver doesn't
> > differentiate between video playback and a normal flip.
> > It is safer to allow DC5 after 6 idle frame, as PSR2 requires
> > minimum 6 idle frame.
> 
> It would be clearer to say here that after a flip we enable DC3co, after
> which we wait manually 6 frames (by scheduling the idle frame work) at
> which point we enable PSR deep sleep with 6 idle frames. After this 6
> idle frames the HW will enter deep sleep and DC5 will be entered
> after this by DMC at some point.
> 
> The claim that we _have_ to make the HW wait for 6 idle frames before it
> enters deep sleep doesn't make much sense to me, would be good to see a
> reference to that if it really exists. That setting seems to only serve
> the purpose to avoid update lags, but in the future (as discussed with
> Ville) we should switch to manual PSR tracking and for that we would
> program the HW to wait only 1 idle frame before entering deep sleep and
> rely only on the manual 6 idle frame wait (via the idle frame work) to
> avoid update lags.
> 
> > 
> > v2: calculated s/w state to switch over dc3co when there is an
> >     update. [Imre]
> >     used cancel_delayed_work_sync() in order to avoid any race
> >     with already scheduled delayed work. [Imre]
> > v3: cancel_delayed_work_sync() may blocked the commit work.
> >     Hence dropping it, dc5_idle_thread() checks the valid wakeref before
> >     putting the reference count, which avoids any chances of dropping
> >     a zero wakeref. [Imre (IRC)]
> > v4: use frontbuffer flush mechanism. [Imre]
> > 
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > Cc: Imre Deak <imre.deak@intel.com>
> > Cc: Animesh Manna <animesh.manna@intel.com>
> > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_display.c  |  2 +
> >  .../drm/i915/display/intel_display_power.c    | 79 +++++++++++++++++++
> >  .../drm/i915/display/intel_display_power.h    |  6 ++
> >  .../gpu/drm/i915/display/intel_frontbuffer.c  |  1 +
> >  drivers/gpu/drm/i915/i915_drv.h               |  1 +
> >  5 files changed, 89 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> > index 84488f87d058..2754e8ee693f 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display.c
> > @@ -16206,6 +16206,7 @@ int intel_modeset_init(struct drm_device *dev)
> >  	init_llist_head(&dev_priv->atomic_helper.free_list);
> >  	INIT_WORK(&dev_priv->atomic_helper.free_work,
> >  		  intel_atomic_helper_free_state_worker);
> > +	INIT_DELAYED_WORK(&dev_priv->csr.idle_work, tgl_dc5_idle_thread);
> >  
> >  	intel_init_quirks(dev_priv);
> >  
> > @@ -17147,6 +17148,7 @@ void intel_modeset_driver_remove(struct drm_device *dev)
> >  	flush_workqueue(dev_priv->modeset_wq);
> >  
> >  	flush_work(&dev_priv->atomic_helper.free_work);
> > +	flush_delayed_work(&dev_priv->csr.idle_work);
> 
> This is racy as the work could be still running, but also would leave a
> few other places with the work running like suspend, so let's just make
> sure that it's not running any more after encoder disabling.
> 
> >  	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
> >  
> >  	/*
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> > index ecce118b5b0e..c110f04c9733 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> > @@ -20,6 +20,7 @@
> >  #include "intel_sideband.h"
> >  #include "intel_tc.h"
> >  #include "intel_pm.h"
> > +#include "intel_psr.h"
> >  
> >  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
> >  					 enum i915_power_well_id power_well_id);
> > @@ -773,6 +774,27 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
> >  	dev_priv->csr.dc_state = val & mask;
> >  }
> >  
> > +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
> > +{
> > +	u32 pixel_rate, crtc_htotal, crtc_vtotal;
> > +	u32 frametime_us;
> > +
> > +	if (!cstate || !cstate->base.active)
> > +		return 0;
> > +
> > +	pixel_rate = cstate->pixel_rate;
> > +
> > +	if (WARN_ON(pixel_rate == 0))
> > +		return 0;
> > +
> > +	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
> > +	crtc_vtotal = cstate->base.adjusted_mode.crtc_vtotal;
> > +	frametime_us = DIV_ROUND_UP(crtc_htotal * crtc_vtotal * 1000ULL,
> > +				    pixel_rate);
> > +
> > +	return frametime_us;
> > +}
> > +
> >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
> > @@ -817,6 +839,49 @@ void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> >  	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
> >  }
> >  
> > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
> > +{
> > +	struct intel_crtc_state *cstate;
> > +	u32 delay;
> > +	unsigned int busy_frontbuffer_bits;
> > +
> > +	if (!IS_TIGERLAKE(dev_priv))
> > +		return;
> > +
> > +	if (origin != ORIGIN_FLIP)
> > +		return;
> > +
> > +	if (!dev_priv->csr.dc3co_crtc)
> > +		return;
> > +
> > +	cstate = to_intel_crtc_state(dev_priv->csr.dc3co_crtc->base.state);
> > +	frontbuffer_bits &=
> > +		INTEL_FRONTBUFFER_ALL_MASK(dev_priv->csr.dc3co_crtc->pipe);
> > +	busy_frontbuffer_bits &= ~frontbuffer_bits;
> 
> Hrm, this looks wrong. Also it depends on the PSR mechanism, so this
> whole DC3co flush logic should rather be done from the PSR flush func,
> using psr.pipe.
> 
Hmm initially i have planned to have entire logic under psr flush func
but PSR invalidate/flush logic for ORIGIN_FLIP relies on H/W tracking, 
PSR invalidate and flush call has early return for ORIGIN_FLIP 
unlike DC3CO case where we are only interested in ORIGIN_FLIP requests.
> > +
> > +	mutex_lock(&dev_priv->psr.lock);
> > +
> > +	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
> > +		goto unlock;
> > +
> > +	/*
> > +	 * At every flip frontbuffer flush first cancel the delayed work,
> > +	 * when delayed schedules that means display has been idle
> > +	 * for the 6 idle frame.
> > +	 */
> > +	if (!busy_frontbuffer_bits) {
> > +		cancel_delayed_work(&dev_priv->csr.idle_work);
> 
> The above is racy.
Yes tgl_dc5_idle_thread() can even run after this but in that case also
tgl_set_target_state() is protected by the power domain locks.
Do u see any other harm of it apart from setting target_dc_state 
immediately to DC5/DC6 after it sets to DC3CO (IMO this would be a little 
penalty for a VBLANK in next page flip it will again set target_dc_state to DC3CO).
 
I can think of two possible solutions to avoid this race.
1. cancel_delayed_work_sync().
2. Having a flag to indicate that delayed work has been canceled
   and same can be used by tgl_dc5_idle_thread() to have an early return.
Please suggest your opinion on it. 
> 
> > +		tgl_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO, false);
> > +		delay = DC5_REQ_IDLE_FRAMES * intel_get_frame_time_us(cstate);
> > +		schedule_delayed_work(&dev_priv->csr.idle_work,
> > +				      usecs_to_jiffies(delay));
> > +	}
> > +
> > +unlock:
> > +	mutex_unlock(&dev_priv->psr.lock);
> > +}
> > +
> >  static bool tgl_dc3co_is_edp_connected(struct intel_crtc_state  *crtc_state)
> >  {
> >  	struct drm_atomic_state *state = crtc_state->base.state;
> > @@ -880,6 +945,14 @@ void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state)
> >  	}
> >  }
> >  
> > +void tgl_dc5_idle_thread(struct work_struct *work)
> > +{
> > +	struct drm_i915_private *dev_priv =
> > +		container_of(work, typeof(*dev_priv), csr.idle_work.work);
> > +
> > +	tgl_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6, true);
> 
> So it would result in enabling deep sleep, but without the PSR lock.
> That's one reason we should really keep the PSR specific parts here.
> 
> > +}
> > +
> >  static void tgl_allow_dc3co(struct drm_i915_private *dev_priv)
> >  {
> >  	if (!dev_priv->psr.sink_psr2_support)
> > @@ -1155,6 +1228,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> >  	if (state == dev_priv->csr.max_dc_state)
> >  		goto unlock;
> >  
> > +	if (!psr2_deep_sleep)
> > +		tgl_psr2_deep_sleep_disable(dev_priv);
> > +
> >  	if (!dc_off_enabled) {
> >  		/*
> >  		 * Need to disable the DC off power well to
> > @@ -1167,6 +1243,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> >  	}
> >  		dev_priv->csr.max_dc_state = state;
> >  
> > +	if (psr2_deep_sleep)
> > +		tgl_psr2_deep_sleep_enable(dev_priv);
> > +
> >  unlock:
> >  	mutex_unlock(&power_domains->lock);
> >  }
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> > index d77a5a53f543..df096d64c744 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> > @@ -9,6 +9,9 @@
> >  #include "intel_display.h"
> >  #include "intel_runtime_pm.h"
> >  #include "i915_reg.h"
> > +#include "intel_frontbuffer.h"
> > +
> > +#define DC5_REQ_IDLE_FRAMES	6
> 
> No need for a define for this.
> 
> >  
> >  struct drm_i915_private;
> >  struct intel_encoder;
> > @@ -266,6 +269,9 @@ void tgl_dc3co_crtc_compute_config(struct drm_i915_private *dev_priv,
> >  void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state);
> >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> >  void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
> > +void tgl_dc5_idle_thread(struct work_struct *work);
> >  
> >  const char *
> >  intel_display_power_domain_str(enum intel_display_power_domain domain);
> > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > index fc40dc1fdbcc..c3b10f6e4382 100644
> > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > @@ -90,6 +90,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
> >  	might_sleep();
> >  	intel_edp_drrs_flush(i915, frontbuffer_bits);
> >  	intel_psr_flush(i915, frontbuffer_bits, origin);
> > +	tgl_dc3co_flush(i915, frontbuffer_bits, origin);
> >  	intel_fbc_flush(i915, frontbuffer_bits, origin);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 3218b0319852..fe71119a458e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -338,6 +338,7 @@ struct intel_csr {
> >  	u32 dc_state;
> >  	u32 max_dc_state;
> >  	u32 allowed_dc_mask;
> > +	struct delayed_work idle_work;
> >  	intel_wakeref_t wakeref;
> >  	/* cache the crtc on which DC3CO will be allowed */
> >  	struct intel_crtc *dc3co_crtc;
> > -- 
> > 2.21.0
> >
On Tue, Sep 10, 2019 at 03:26:20PM +0530, Anshuman Gupta wrote:
> On 2019-09-08 at 20:55:17 +0300, Imre Deak wrote:
> Hi Imre ,
> Thanks for review, could you please provide your response on below
> comments.
> > On Sat, Sep 07, 2019 at 10:44:42PM +0530, Anshuman Gupta wrote:
> > > DC3CO is useful power state, when DMC detects PSR2 idle frame
> > > while an active video playback, playing 30fps video on 60hz panel
> > > is the classic example of this use case.
> > > DC5 and DC6 saves more power, but can't be entered during video
> > > playback because there are not enough idle frames in a row to meet
> > > most PSR2 panel deep sleep entry requirement typically 4 frames.
> > 
> > Please also explain why DC3co will be enabled only for flips but not for
> > other frontbuffer flush events (ORIGIN_CS/DIRTYFB etc.) and that we
> > could enable it for those too by switching to manual PSR tracking and
> > programming only 1 idle frame for deep sleep (see below).
> > 
> > Also explaining that the frontbuffer invalidate event doesn't need to be
> > acted on (b/c of PSR exit) would be helpful.
> > 
> > > 
> > > It will be worthy to enable DC3CO after completion of each flip
> > > and switch back to DC5 when display is idle, as driver doesn't
> > > differentiate between video playback and a normal flip.
> > > It is safer to allow DC5 after 6 idle frame, as PSR2 requires
> > > minimum 6 idle frame.
> > 
> > It would be clearer to say here that after a flip we enable DC3co, after
> > which we wait manually 6 frames (by scheduling the idle frame work) at
> > which point we enable PSR deep sleep with 6 idle frames. After this 6
> > idle frames the HW will enter deep sleep and DC5 will be entered
> > after this by DMC at some point.
> > 
> > The claim that we _have_ to make the HW wait for 6 idle frames before it
> > enters deep sleep doesn't make much sense to me, would be good to see a
> > reference to that if it really exists. That setting seems to only serve
> > the purpose to avoid update lags, but in the future (as discussed with
> > Ville) we should switch to manual PSR tracking and for that we would
> > program the HW to wait only 1 idle frame before entering deep sleep and
> > rely only on the manual 6 idle frame wait (via the idle frame work) to
> > avoid update lags.
> > 
> > > 
> > > v2: calculated s/w state to switch over dc3co when there is an
> > >     update. [Imre]
> > >     used cancel_delayed_work_sync() in order to avoid any race
> > >     with already scheduled delayed work. [Imre]
> > > v3: cancel_delayed_work_sync() may blocked the commit work.
> > >     Hence dropping it, dc5_idle_thread() checks the valid wakeref before
> > >     putting the reference count, which avoids any chances of dropping
> > >     a zero wakeref. [Imre (IRC)]
> > > v4: use frontbuffer flush mechanism. [Imre]
> > > 
> > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > Cc: Imre Deak <imre.deak@intel.com>
> > > Cc: Animesh Manna <animesh.manna@intel.com>
> > > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/display/intel_display.c  |  2 +
> > >  .../drm/i915/display/intel_display_power.c    | 79 +++++++++++++++++++
> > >  .../drm/i915/display/intel_display_power.h    |  6 ++
> > >  .../gpu/drm/i915/display/intel_frontbuffer.c  |  1 +
> > >  drivers/gpu/drm/i915/i915_drv.h               |  1 +
> > >  5 files changed, 89 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> > > index 84488f87d058..2754e8ee693f 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_display.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_display.c
> > > @@ -16206,6 +16206,7 @@ int intel_modeset_init(struct drm_device *dev)
> > >  	init_llist_head(&dev_priv->atomic_helper.free_list);
> > >  	INIT_WORK(&dev_priv->atomic_helper.free_work,
> > >  		  intel_atomic_helper_free_state_worker);
> > > +	INIT_DELAYED_WORK(&dev_priv->csr.idle_work, tgl_dc5_idle_thread);
> > >  
> > >  	intel_init_quirks(dev_priv);
> > >  
> > > @@ -17147,6 +17148,7 @@ void intel_modeset_driver_remove(struct drm_device *dev)
> > >  	flush_workqueue(dev_priv->modeset_wq);
> > >  
> > >  	flush_work(&dev_priv->atomic_helper.free_work);
> > > +	flush_delayed_work(&dev_priv->csr.idle_work);
> > 
> > This is racy as the work could be still running, but also would leave a
> > few other places with the work running like suspend, so let's just make
> > sure that it's not running any more after encoder disabling.
> > 
> > >  	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
> > >  
> > >  	/*
> > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> > > index ecce118b5b0e..c110f04c9733 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> > > @@ -20,6 +20,7 @@
> > >  #include "intel_sideband.h"
> > >  #include "intel_tc.h"
> > >  #include "intel_pm.h"
> > > +#include "intel_psr.h"
> > >  
> > >  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
> > >  					 enum i915_power_well_id power_well_id);
> > > @@ -773,6 +774,27 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
> > >  	dev_priv->csr.dc_state = val & mask;
> > >  }
> > >  
> > > +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
> > > +{
> > > +	u32 pixel_rate, crtc_htotal, crtc_vtotal;
> > > +	u32 frametime_us;
> > > +
> > > +	if (!cstate || !cstate->base.active)
> > > +		return 0;
> > > +
> > > +	pixel_rate = cstate->pixel_rate;
> > > +
> > > +	if (WARN_ON(pixel_rate == 0))
> > > +		return 0;
> > > +
> > > +	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
> > > +	crtc_vtotal = cstate->base.adjusted_mode.crtc_vtotal;
> > > +	frametime_us = DIV_ROUND_UP(crtc_htotal * crtc_vtotal * 1000ULL,
> > > +				    pixel_rate);
> > > +
> > > +	return frametime_us;
> > > +}
> > > +
> > >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> > >  {
> > >  	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
> > > @@ -817,6 +839,49 @@ void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> > >  	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
> > >  }
> > >  
> > > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
> > > +{
> > > +	struct intel_crtc_state *cstate;
> > > +	u32 delay;
> > > +	unsigned int busy_frontbuffer_bits;
> > > +
> > > +	if (!IS_TIGERLAKE(dev_priv))
> > > +		return;
> > > +
> > > +	if (origin != ORIGIN_FLIP)
> > > +		return;
> > > +
> > > +	if (!dev_priv->csr.dc3co_crtc)
> > > +		return;
> > > +
> > > +	cstate = to_intel_crtc_state(dev_priv->csr.dc3co_crtc->base.state);
> > > +	frontbuffer_bits &=
> > > +		INTEL_FRONTBUFFER_ALL_MASK(dev_priv->csr.dc3co_crtc->pipe);
> > > +	busy_frontbuffer_bits &= ~frontbuffer_bits;
> > 
> > Hrm, this looks wrong. Also it depends on the PSR mechanism, so this
> > whole DC3co flush logic should rather be done from the PSR flush func,
> > using psr.pipe.
> > 
> Hmm initially i have planned to have entire logic under psr flush func
> but PSR invalidate/flush logic for ORIGIN_FLIP relies on H/W tracking, 
> PSR invalidate and flush call has early return for ORIGIN_FLIP 
> unlike DC3CO case where we are only interested in ORIGIN_FLIP requests.

intel_psr_flush() would be very similar to what we need to do for
DC3co/ORIGIN_FLIP, except adjusting psr.busy_frontbuffer_bits. I think
eventually we could switch to full manual tracking as mentioned earlier,
so we'd also enable/disable PSR manually in case of ORIGIN_FLIP.

> > > +
> > > +	mutex_lock(&dev_priv->psr.lock);
> > > +
> > > +	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
> > > +		goto unlock;
> > > +
> > > +	/*
> > > +	 * At every flip frontbuffer flush first cancel the delayed work,
> > > +	 * when delayed schedules that means display has been idle
> > > +	 * for the 6 idle frame.
> > > +	 */
> > > +	if (!busy_frontbuffer_bits) {
> > > +		cancel_delayed_work(&dev_priv->csr.idle_work);
> > 
> > The above is racy.
> Yes tgl_dc5_idle_thread() can even run after this but in that case also
> tgl_set_target_state() is protected by the power domain locks.
> Do u see any other harm of it apart from setting target_dc_state 
> immediately to DC5/DC6 after it sets to DC3CO (IMO this would be a little 
> penalty for a VBLANK in next page flip it will again set target_dc_state to DC3CO).
>  
> I can think of two possible solutions to avoid this race.
> 1. cancel_delayed_work_sync().
> 2. Having a flag to indicate that delayed work has been canceled
>    and same can be used by tgl_dc5_idle_thread() to have an early return.
> Please suggest your opinion on it. 

One way would be, instead of canceling the work and scheduling a new one,
extend the timer of it and do an early return in the work if another one
is already pending (due to the extension).

During PSR disabling (as part of encoder disabling)
cancel_delayed_work_sync() could be used then.

> > 
> > > +		tgl_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO, false);
> > > +		delay = DC5_REQ_IDLE_FRAMES * intel_get_frame_time_us(cstate);
> > > +		schedule_delayed_work(&dev_priv->csr.idle_work,
> > > +				      usecs_to_jiffies(delay));
> > > +	}
> > > +
> > > +unlock:
> > > +	mutex_unlock(&dev_priv->psr.lock);
> > > +}
> > > +
> > >  static bool tgl_dc3co_is_edp_connected(struct intel_crtc_state  *crtc_state)
> > >  {
> > >  	struct drm_atomic_state *state = crtc_state->base.state;
> > > @@ -880,6 +945,14 @@ void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state)
> > >  	}
> > >  }
> > >  
> > > +void tgl_dc5_idle_thread(struct work_struct *work)
> > > +{
> > > +	struct drm_i915_private *dev_priv =
> > > +		container_of(work, typeof(*dev_priv), csr.idle_work.work);
> > > +
> > > +	tgl_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6, true);
> > 
> > So it would result in enabling deep sleep, but without the PSR lock.
> > That's one reason we should really keep the PSR specific parts here.
> > 
> > > +}
> > > +
> > >  static void tgl_allow_dc3co(struct drm_i915_private *dev_priv)
> > >  {
> > >  	if (!dev_priv->psr.sink_psr2_support)
> > > @@ -1155,6 +1228,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> > >  	if (state == dev_priv->csr.max_dc_state)
> > >  		goto unlock;
> > >  
> > > +	if (!psr2_deep_sleep)
> > > +		tgl_psr2_deep_sleep_disable(dev_priv);
> > > +
> > >  	if (!dc_off_enabled) {
> > >  		/*
> > >  		 * Need to disable the DC off power well to
> > > @@ -1167,6 +1243,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> > >  	}
> > >  		dev_priv->csr.max_dc_state = state;
> > >  
> > > +	if (psr2_deep_sleep)
> > > +		tgl_psr2_deep_sleep_enable(dev_priv);
> > > +
> > >  unlock:
> > >  	mutex_unlock(&power_domains->lock);
> > >  }
> > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> > > index d77a5a53f543..df096d64c744 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> > > +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> > > @@ -9,6 +9,9 @@
> > >  #include "intel_display.h"
> > >  #include "intel_runtime_pm.h"
> > >  #include "i915_reg.h"
> > > +#include "intel_frontbuffer.h"
> > > +
> > > +#define DC5_REQ_IDLE_FRAMES	6
> > 
> > No need for a define for this.
> > 
> > >  
> > >  struct drm_i915_private;
> > >  struct intel_encoder;
> > > @@ -266,6 +269,9 @@ void tgl_dc3co_crtc_compute_config(struct drm_i915_private *dev_priv,
> > >  void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state);
> > >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> > >  void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> > > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
> > > +void tgl_dc5_idle_thread(struct work_struct *work);
> > >  
> > >  const char *
> > >  intel_display_power_domain_str(enum intel_display_power_domain domain);
> > > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > index fc40dc1fdbcc..c3b10f6e4382 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > @@ -90,6 +90,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
> > >  	might_sleep();
> > >  	intel_edp_drrs_flush(i915, frontbuffer_bits);
> > >  	intel_psr_flush(i915, frontbuffer_bits, origin);
> > > +	tgl_dc3co_flush(i915, frontbuffer_bits, origin);
> > >  	intel_fbc_flush(i915, frontbuffer_bits, origin);
> > >  }
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 3218b0319852..fe71119a458e 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -338,6 +338,7 @@ struct intel_csr {
> > >  	u32 dc_state;
> > >  	u32 max_dc_state;
> > >  	u32 allowed_dc_mask;
> > > +	struct delayed_work idle_work;
> > >  	intel_wakeref_t wakeref;
> > >  	/* cache the crtc on which DC3CO will be allowed */
> > >  	struct intel_crtc *dc3co_crtc;
> > > -- 
> > > 2.21.0
> > >
On 2019-09-11 at 11:50:26 +0300, Imre Deak wrote:
> On Tue, Sep 10, 2019 at 03:26:20PM +0530, Anshuman Gupta wrote:
> > On 2019-09-08 at 20:55:17 +0300, Imre Deak wrote:
> > Hi Imre ,
> > Thanks for review, could you please provide your response on below
> > comments.
> > > On Sat, Sep 07, 2019 at 10:44:42PM +0530, Anshuman Gupta wrote:
> > > > DC3CO is useful power state, when DMC detects PSR2 idle frame
> > > > while an active video playback, playing 30fps video on 60hz panel
> > > > is the classic example of this use case.
> > > > DC5 and DC6 saves more power, but can't be entered during video
> > > > playback because there are not enough idle frames in a row to meet
> > > > most PSR2 panel deep sleep entry requirement typically 4 frames.
> > > 
> > > Please also explain why DC3co will be enabled only for flips but not for
> > > other frontbuffer flush events (ORIGIN_CS/DIRTYFB etc.) and that we
> > > could enable it for those too by switching to manual PSR tracking and
> > > programming only 1 idle frame for deep sleep (see below).
> > > 
> > > Also explaining that the frontbuffer invalidate event doesn't need to be
> > > acted on (b/c of PSR exit) would be helpful.
> > > 
> > > > 
> > > > It will be worthy to enable DC3CO after completion of each flip
> > > > and switch back to DC5 when display is idle, as driver doesn't
> > > > differentiate between video playback and a normal flip.
> > > > It is safer to allow DC5 after 6 idle frame, as PSR2 requires
> > > > minimum 6 idle frame.
> > > 
> > > It would be clearer to say here that after a flip we enable DC3co, after
> > > which we wait manually 6 frames (by scheduling the idle frame work) at
> > > which point we enable PSR deep sleep with 6 idle frames. After this 6
> > > idle frames the HW will enter deep sleep and DC5 will be entered
> > > after this by DMC at some point.
> > > 
> > > The claim that we _have_ to make the HW wait for 6 idle frames before it
> > > enters deep sleep doesn't make much sense to me, would be good to see a
> > > reference to that if it really exists. That setting seems to only serve
> > > the purpose to avoid update lags, but in the future (as discussed with
> > > Ville) we should switch to manual PSR tracking and for that we would
> > > program the HW to wait only 1 idle frame before entering deep sleep and
> > > rely only on the manual 6 idle frame wait (via the idle frame work) to
> > > avoid update lags.
> > > 
> > > > 
> > > > v2: calculated s/w state to switch over dc3co when there is an
> > > >     update. [Imre]
> > > >     used cancel_delayed_work_sync() in order to avoid any race
> > > >     with already scheduled delayed work. [Imre]
> > > > v3: cancel_delayed_work_sync() may blocked the commit work.
> > > >     Hence dropping it, dc5_idle_thread() checks the valid wakeref before
> > > >     putting the reference count, which avoids any chances of dropping
> > > >     a zero wakeref. [Imre (IRC)]
> > > > v4: use frontbuffer flush mechanism. [Imre]
> > > > 
> > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > Cc: Imre Deak <imre.deak@intel.com>
> > > > Cc: Animesh Manna <animesh.manna@intel.com>
> > > > Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/display/intel_display.c  |  2 +
> > > >  .../drm/i915/display/intel_display_power.c    | 79 +++++++++++++++++++
> > > >  .../drm/i915/display/intel_display_power.h    |  6 ++
> > > >  .../gpu/drm/i915/display/intel_frontbuffer.c  |  1 +
> > > >  drivers/gpu/drm/i915/i915_drv.h               |  1 +
> > > >  5 files changed, 89 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> > > > index 84488f87d058..2754e8ee693f 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_display.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_display.c
> > > > @@ -16206,6 +16206,7 @@ int intel_modeset_init(struct drm_device *dev)
> > > >  	init_llist_head(&dev_priv->atomic_helper.free_list);
> > > >  	INIT_WORK(&dev_priv->atomic_helper.free_work,
> > > >  		  intel_atomic_helper_free_state_worker);
> > > > +	INIT_DELAYED_WORK(&dev_priv->csr.idle_work, tgl_dc5_idle_thread);
> > > >  
> > > >  	intel_init_quirks(dev_priv);
> > > >  
> > > > @@ -17147,6 +17148,7 @@ void intel_modeset_driver_remove(struct drm_device *dev)
> > > >  	flush_workqueue(dev_priv->modeset_wq);
> > > >  
> > > >  	flush_work(&dev_priv->atomic_helper.free_work);
> > > > +	flush_delayed_work(&dev_priv->csr.idle_work);
> > > 
> > > This is racy as the work could be still running, but also would leave a
> > > few other places with the work running like suspend, so let's just make
> > > sure that it's not running any more after encoder disabling.
> > > 
> > > >  	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
> > > >  
> > > >  	/*
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> > > > index ecce118b5b0e..c110f04c9733 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> > > > @@ -20,6 +20,7 @@
> > > >  #include "intel_sideband.h"
> > > >  #include "intel_tc.h"
> > > >  #include "intel_pm.h"
> > > > +#include "intel_psr.h"
> > > >  
> > > >  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
> > > >  					 enum i915_power_well_id power_well_id);
> > > > @@ -773,6 +774,27 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
> > > >  	dev_priv->csr.dc_state = val & mask;
> > > >  }
> > > >  
> > > > +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
> > > > +{
> > > > +	u32 pixel_rate, crtc_htotal, crtc_vtotal;
> > > > +	u32 frametime_us;
> > > > +
> > > > +	if (!cstate || !cstate->base.active)
> > > > +		return 0;
> > > > +
> > > > +	pixel_rate = cstate->pixel_rate;
> > > > +
> > > > +	if (WARN_ON(pixel_rate == 0))
> > > > +		return 0;
> > > > +
> > > > +	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
> > > > +	crtc_vtotal = cstate->base.adjusted_mode.crtc_vtotal;
> > > > +	frametime_us = DIV_ROUND_UP(crtc_htotal * crtc_vtotal * 1000ULL,
> > > > +				    pixel_rate);
> > > > +
> > > > +	return frametime_us;
> > > > +}
> > > > +
> > > >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> > > >  {
> > > >  	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
> > > > @@ -817,6 +839,49 @@ void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *cstate)
> > > >  	I915_WRITE(EXITLINE(cstate->cpu_transcoder), val);
> > > >  }
> > > >  
> > > > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > > > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
> > > > +{
> > > > +	struct intel_crtc_state *cstate;
> > > > +	u32 delay;
> > > > +	unsigned int busy_frontbuffer_bits;
> > > > +
> > > > +	if (!IS_TIGERLAKE(dev_priv))
> > > > +		return;
> > > > +
> > > > +	if (origin != ORIGIN_FLIP)
> > > > +		return;
> > > > +
> > > > +	if (!dev_priv->csr.dc3co_crtc)
> > > > +		return;
> > > > +
> > > > +	cstate = to_intel_crtc_state(dev_priv->csr.dc3co_crtc->base.state);
> > > > +	frontbuffer_bits &=
> > > > +		INTEL_FRONTBUFFER_ALL_MASK(dev_priv->csr.dc3co_crtc->pipe);
> > > > +	busy_frontbuffer_bits &= ~frontbuffer_bits;
> > > 
> > > Hrm, this looks wrong. Also it depends on the PSR mechanism, so this
> > > whole DC3co flush logic should rather be done from the PSR flush func,
> > > using psr.pipe.
> > > 
> > Hmm initially i have planned to have entire logic under psr flush func
> > but PSR invalidate/flush logic for ORIGIN_FLIP relies on H/W tracking, 
> > PSR invalidate and flush call has early return for ORIGIN_FLIP 
> > unlike DC3CO case where we are only interested in ORIGIN_FLIP requests.
> 
> intel_psr_flush() would be very similar to what we need to do for
> DC3co/ORIGIN_FLIP, except adjusting psr.busy_frontbuffer_bits. I think
> eventually we could switch to full manual tracking as mentioned earlier,
> so we'd also enable/disable PSR manually in case of ORIGIN_FLIP.
  So i will use the different function tgl_dc3co_flush() with a comment "in
  future we will use intel_psr_flush when we will switch to full manual tracking
  for PSR" and will use the psr.pipe.
> 
> > > > +
> > > > +	mutex_lock(&dev_priv->psr.lock);
> > > > +
> > > > +	if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active)
> > > > +		goto unlock;
> > > > +
> > > > +	/*
> > > > +	 * At every flip frontbuffer flush first cancel the delayed work,
> > > > +	 * when delayed schedules that means display has been idle
> > > > +	 * for the 6 idle frame.
> > > > +	 */
> > > > +	if (!busy_frontbuffer_bits) {
> > > > +		cancel_delayed_work(&dev_priv->csr.idle_work);
> > > 
> > > The above is racy.
> > Yes tgl_dc5_idle_thread() can even run after this but in that case also
> > tgl_set_target_state() is protected by the power domain locks.
> > Do u see any other harm of it apart from setting target_dc_state 
> > immediately to DC5/DC6 after it sets to DC3CO (IMO this would be a little 
> > penalty for a VBLANK in next page flip it will again set target_dc_state to DC3CO).
> >  
> > I can think of two possible solutions to avoid this race.
> > 1. cancel_delayed_work_sync().
> > 2. Having a flag to indicate that delayed work has been canceled
> >    and same can be used by tgl_dc5_idle_thread() to have an early return.
> > Please suggest your opinion on it. 
> 
> One way would be, instead of canceling the work and scheduling a new one,
> extend the timer of it and do an early return in the work if another one
> is already pending (due to the extension).
> 
> During PSR disabling (as part of encoder disabling)
> cancel_delayed_work_sync() could be used then.
  I am using cancel_delayed_work_sync() for next version
  in tgl_disable_psr2_transcoder_exitline() func which calls from 
  intel_psr_disable() as part of encoder disable.

Thanks,
Anshuman Gupta.
> 
> > > 
> > > > +		tgl_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO, false);
> > > > +		delay = DC5_REQ_IDLE_FRAMES * intel_get_frame_time_us(cstate);
> > > > +		schedule_delayed_work(&dev_priv->csr.idle_work,
> > > > +				      usecs_to_jiffies(delay));
> > > > +	}
> > > > +
> > > > +unlock:
> > > > +	mutex_unlock(&dev_priv->psr.lock);
> > > > +}
> > > > +
> > > >  static bool tgl_dc3co_is_edp_connected(struct intel_crtc_state  *crtc_state)
> > > >  {
> > > >  	struct drm_atomic_state *state = crtc_state->base.state;
> > > > @@ -880,6 +945,14 @@ void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state)
> > > >  	}
> > > >  }
> > > >  
> > > > +void tgl_dc5_idle_thread(struct work_struct *work)
> > > > +{
> > > > +	struct drm_i915_private *dev_priv =
> > > > +		container_of(work, typeof(*dev_priv), csr.idle_work.work);
> > > > +
> > > > +	tgl_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6, true);
> > > 
> > > So it would result in enabling deep sleep, but without the PSR lock.
> > > That's one reason we should really keep the PSR specific parts here.
> > > 
> > > > +}
> > > > +
> > > >  static void tgl_allow_dc3co(struct drm_i915_private *dev_priv)
> > > >  {
> > > >  	if (!dev_priv->psr.sink_psr2_support)
> > > > @@ -1155,6 +1228,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> > > >  	if (state == dev_priv->csr.max_dc_state)
> > > >  		goto unlock;
> > > >  
> > > > +	if (!psr2_deep_sleep)
> > > > +		tgl_psr2_deep_sleep_disable(dev_priv);
> > > > +
> > > >  	if (!dc_off_enabled) {
> > > >  		/*
> > > >  		 * Need to disable the DC off power well to
> > > > @@ -1167,6 +1243,9 @@ void tgl_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state,
> > > >  	}
> > > >  		dev_priv->csr.max_dc_state = state;
> > > >  
> > > > +	if (psr2_deep_sleep)
> > > > +		tgl_psr2_deep_sleep_enable(dev_priv);
> > > > +
> > > >  unlock:
> > > >  	mutex_unlock(&power_domains->lock);
> > > >  }
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> > > > index d77a5a53f543..df096d64c744 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> > > > +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> > > > @@ -9,6 +9,9 @@
> > > >  #include "intel_display.h"
> > > >  #include "intel_runtime_pm.h"
> > > >  #include "i915_reg.h"
> > > > +#include "intel_frontbuffer.h"
> > > > +
> > > > +#define DC5_REQ_IDLE_FRAMES	6
> > > 
> > > No need for a define for this.
> > > 
> > > >  
> > > >  struct drm_i915_private;
> > > >  struct intel_encoder;
> > > > @@ -266,6 +269,9 @@ void tgl_dc3co_crtc_compute_config(struct drm_i915_private *dev_priv,
> > > >  void tgl_dc3co_crtc_get_config(struct intel_crtc_state *crtc_state);
> > > >  void tgl_disable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> > > >  void tgl_enable_psr2_transcoder_exitline(const struct intel_crtc_state *state);
> > > > +void tgl_dc3co_flush(struct drm_i915_private *dev_priv,
> > > > +		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
> > > > +void tgl_dc5_idle_thread(struct work_struct *work);
> > > >  
> > > >  const char *
> > > >  intel_display_power_domain_str(enum intel_display_power_domain domain);
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > > index fc40dc1fdbcc..c3b10f6e4382 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > > > @@ -90,6 +90,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
> > > >  	might_sleep();
> > > >  	intel_edp_drrs_flush(i915, frontbuffer_bits);
> > > >  	intel_psr_flush(i915, frontbuffer_bits, origin);
> > > > +	tgl_dc3co_flush(i915, frontbuffer_bits, origin);
> > > >  	intel_fbc_flush(i915, frontbuffer_bits, origin);
> > > >  }
> > > >  
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > > index 3218b0319852..fe71119a458e 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -338,6 +338,7 @@ struct intel_csr {
> > > >  	u32 dc_state;
> > > >  	u32 max_dc_state;
> > > >  	u32 allowed_dc_mask;
> > > > +	struct delayed_work idle_work;
> > > >  	intel_wakeref_t wakeref;
> > > >  	/* cache the crtc on which DC3CO will be allowed */
> > > >  	struct intel_crtc *dc3co_crtc;
> > > > -- 
> > > > 2.21.0
> > > >