[15/16] drm/i915: Use vblank workers for gamma updates

Submitted by Ville Syrjälä on Sept. 6, 2019, 7:02 p.m.

Details

Message ID 20190906190247.5878-15-ville.syrjala@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX - Try Bot

Not browsing as part of any series.

Commit Message

Ville Syrjälä Sept. 6, 2019, 7:02 p.m.
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_atomic.c   |   5 +
 drivers/gpu/drm/i915/display/intel_display.c  | 194 ++++++++++++------
 drivers/gpu/drm/i915/display/intel_display.h  |  11 +
 .../drm/i915/display/intel_display_types.h    |   6 +
 drivers/gpu/drm/i915/display/intel_sprite.c   |  54 ++++-
 drivers/gpu/drm/i915/display/intel_sprite.h   |   3 +-
 6 files changed, 209 insertions(+), 64 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index a8ce0e16a2fa..66b6ac4bdaad 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -206,6 +206,11 @@  intel_crtc_duplicate_state(struct drm_crtc *crtc)
 	crtc_state->update_planes = 0;
 	crtc_state->put_domains = 0;
 
+	crtc_state->put_domains = 0;
+	crtc_state->state = NULL;
+	drm_vblank_work_init(&crtc_state->vblank_work,
+			     crtc, intel_crtc_vblank_work);
+
 	return &crtc_state->base;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 34bca50570b7..488d026aab76 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -11466,6 +11466,7 @@  static void intel_crtc_destroy(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
+	pm_qos_remove_request(&intel_crtc->pm_qos);
 	kfree(intel_crtc);
 }
 
@@ -12278,6 +12279,7 @@  clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	memcpy(saved_state->icl_port_dplls, crtc_state->icl_port_dplls,
 	       sizeof(saved_state->icl_port_dplls));
 	saved_state->crc_enabled = crtc_state->crc_enabled;
+	saved_state->vblank_work = crtc_state->vblank_work;
 	if (IS_G4X(dev_priv) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		saved_state->wm = crtc_state->wm;
@@ -14025,6 +14027,84 @@  static void intel_atomic_cleanup_work(struct work_struct *work)
 	intel_atomic_helper_free_state(i915);
 }
 
+static void intel_atomic_commit_done(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+
+	drm_atomic_helper_wait_for_flip_done(&i915->drm, &state->base);
+
+	if (state->modeset)
+		intel_set_cdclk_post_plane_update(i915, &state->cdclk.actual,
+						  &i915->cdclk.actual,
+						  state->cdclk.pipe);
+
+	if (intel_can_enable_sagv(state))
+		intel_enable_sagv(i915);
+
+	queue_work(system_highpri_wq, &state->base.commit_work);
+}
+
+bool intel_crtc_need_lut_load(const struct intel_crtc_state *crtc_state)
+{
+	return !needs_modeset(crtc_state) &&
+		(crtc_state->base.color_mgmt_changed ||
+		 crtc_state->update_pipe);
+}
+
+static void intel_crtc_do_vblank_work(struct intel_atomic_state *state,
+				      struct intel_crtc_state *new_crtc_state,
+				      u64 count)
+{
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+
+	if (new_crtc_state->base.active &&
+	    intel_crtc_need_lut_load(new_crtc_state))
+		intel_color_load_luts(new_crtc_state);
+
+	/*
+	 * Now that the vblank has passed, we can go ahead and program the
+	 * optimal watermarks on platforms that need two-step watermark
+	 * programming.
+	 */
+	if (dev_priv->display.optimize_watermarks)
+		dev_priv->display.optimize_watermarks(state, new_crtc_state);
+
+	intel_post_plane_update(old_crtc_state);
+
+	if (new_crtc_state->put_domains)
+		modeset_put_power_domains(dev_priv, new_crtc_state->put_domains);
+
+	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
+}
+
+void intel_crtc_vblank_work(struct drm_vblank_work *work,
+			    u64 count)
+{
+	struct intel_crtc_state *new_crtc_state =
+		container_of(work, struct intel_crtc_state, vblank_work);
+	struct intel_atomic_state *state = new_crtc_state->state;
+
+	intel_crtc_do_vblank_work(state, new_crtc_state, count);
+
+	new_crtc_state->state = NULL;
+
+	if (atomic_dec_and_test(&state->cleanup_count))
+		intel_atomic_commit_done(state);
+}
+
+static void intel_atomic_flush_vblank_works(struct intel_atomic_state *state)
+{
+	struct intel_crtc_state *old_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_old_intel_crtc_in_state(state, crtc, old_crtc_state, i)
+		drm_vblank_work_flush(&old_crtc_state->vblank_work);
+}
+
 static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 {
 	struct drm_device *dev = state->base.dev;
@@ -14034,6 +14114,8 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	intel_wakeref_t wakeref = 0;
 	int i;
 
+	intel_atomic_flush_vblank_works(state);
+
 	intel_atomic_commit_fence_wait(state);
 
 	drm_atomic_helper_wait_for_dependencies(&state->base);
@@ -14045,7 +14127,6 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 					    new_crtc_state, i) {
 		if (needs_modeset(new_crtc_state) ||
 		    new_crtc_state->update_pipe) {
-
 			new_crtc_state->put_domains =
 				modeset_get_crtc_power_domains(new_crtc_state);
 		}
@@ -14075,6 +14156,17 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 		intel_modeset_verify_disabled(dev_priv, state);
 	}
 
+	/*
+	 * Defer the cleanup of the old state to a separate worker to not
+	 * impede the current task (userspace for blocking modesets) that
+	 * are executed inline. For out-of-line asynchronous modesets/flips,
+	 * deferring to a new worker seems overkill, but we would place a
+	 * schedule point (cond_resched()) here anyway to keep latencies
+	 * down.
+	 */
+	INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work);
+	atomic_set(&state->cleanup_count, 1);
+
 	/* Complete the events for pipes that have now been disabled */
 	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		bool modeset = needs_modeset(new_crtc_state);
@@ -14087,6 +14179,10 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 
 			new_crtc_state->base.event = NULL;
 		}
+
+		/* FIXME should we do this before or after update_crtcs()? */
+		if (!new_crtc_state->base.active)
+			intel_crtc_do_vblank_work(state, new_crtc_state, 0);
 	}
 
 	if (state->modeset)
@@ -14095,63 +14191,25 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
 	dev_priv->display.commit_modeset_enables(state);
 
-	if (state->modeset) {
+	if (state->modeset)
 		intel_encoders_update_complete(state);
 
-		intel_set_cdclk_post_plane_update(dev_priv,
-						  &state->cdclk.actual,
-						  &dev_priv->cdclk.actual,
-						  state->cdclk.pipe);
-	}
-
-	/* FIXME: We should call drm_atomic_helper_commit_hw_done() here
-	 * already, but still need the state for the delayed optimization. To
-	 * fix this:
-	 * - wrap the optimization/post_plane_update stuff into a per-crtc work.
-	 * - schedule that vblank worker _before_ calling hw_done
-	 * - at the start of commit_tail, cancel it _synchrously
-	 * - switch over to the vblank wait helper in the core after that since
-	 *   we don't need out special handling any more.
-	 */
-	drm_atomic_helper_wait_for_flip_done(dev, &state->base);
-
-	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
-		if (new_crtc_state->base.active &&
-		    !needs_modeset(new_crtc_state) &&
-		    (new_crtc_state->base.color_mgmt_changed ||
-		     new_crtc_state->update_pipe))
-			intel_color_load_luts(new_crtc_state);
-	}
+	drm_atomic_helper_commit_hw_done(&state->base);
 
 	/*
-	 * Now that the vblank has passed, we can go ahead and program the
-	 * optimal watermarks on platforms that need two-step watermark
-	 * programming.
+	 * FIXME intel_modeset_verify_crtc() clobbers
+	 * old_crtc_state->commit which causes explosions
+	 * int the parallel works.
 	 *
-	 * TODO: Move this (and other cleanup) to an async worker eventually.
+	 * FIXME how does this even work in the
+	 * pre-vblank work era?
 	 */
-	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
-		if (dev_priv->display.optimize_watermarks)
-			dev_priv->display.optimize_watermarks(state,
-							      new_crtc_state);
-	}
-
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		intel_post_plane_update(old_crtc_state);
-
-		if (new_crtc_state->put_domains)
-			modeset_put_power_domains(dev_priv, new_crtc_state->put_domains);
-
-		intel_modeset_verify_crtc(crtc, state, new_crtc_state);
-	}
+	if (state->modeset) {
+		for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
+			intel_modeset_verify_crtc(crtc, state, new_crtc_state);
 
-	if (state->modeset)
 		intel_verify_planes(state);
-
-	if (state->modeset && intel_can_enable_sagv(state))
-		intel_enable_sagv(dev_priv);
-
-	drm_atomic_helper_commit_hw_done(&state->base);
+	}
 
 	if (state->modeset) {
 		/* As one of the primary mmio accessors, KMS has a high
@@ -14165,16 +14223,8 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	}
 	intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);
 
-	/*
-	 * Defer the cleanup of the old state to a separate worker to not
-	 * impede the current task (userspace for blocking modesets) that
-	 * are executed inline. For out-of-line asynchronous modesets/flips,
-	 * deferring to a new worker seems overkill, but we would place a
-	 * schedule point (cond_resched()) here anyway to keep latencies
-	 * down.
-	 */
-	INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work);
-	queue_work(system_highpri_wq, &state->base.commit_work);
+	if (atomic_dec_and_test(&state->cleanup_count))
+		intel_atomic_commit_done(state);
 }
 
 static void intel_atomic_commit_work(struct work_struct *work)
@@ -14311,6 +14361,7 @@  static int intel_atomic_commit(struct drm_device *dev,
 		if (state->modeset)
 			flush_workqueue(dev_priv->modeset_wq);
 		intel_atomic_commit_tail(state);
+		drm_atomic_helper_wait_for_flip_done(dev, &state->base);
 	}
 
 	return 0;
@@ -14614,6 +14665,15 @@  static void intel_begin_crtc_commit(struct intel_atomic_state *state,
 		intel_atomic_get_new_crtc_state(state, crtc);
 	bool modeset = needs_modeset(new_crtc_state);
 
+	/*
+	 * Make sure we wake up as quickly as possible when
+	 * irq fires. Otherwise we risk the single buffered
+	 * regiser update can get delayed until we're already
+	 * in the active portion of the new frame.
+	 */
+	if (intel_crtc_need_lut_load(new_crtc_state))
+		pm_qos_update_request(&crtc->pm_qos, 0);
+
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(new_crtc_state);
 
@@ -14661,13 +14721,17 @@  static void intel_finish_crtc_commit(struct intel_atomic_state *state,
 		intel_atomic_get_old_crtc_state(state, crtc);
 	struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
+	int ret;
 
-	intel_pipe_update_end(new_crtc_state);
+	ret = intel_pipe_update_end(state, new_crtc_state);
 
 	if (new_crtc_state->update_pipe &&
 	    !needs_modeset(new_crtc_state) &&
 	    old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED)
 		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
+
+	if (WARN_ON_ONCE(ret))
+		intel_crtc_do_vblank_work(state, new_crtc_state, 0);
 }
 
 /**
@@ -15272,6 +15336,11 @@  static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	intel_color_init(intel_crtc);
 
+	pm_qos_add_request(&intel_crtc->pm_qos, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
+	drm_vblank_work_init(&crtc_state->vblank_work,
+			     &intel_crtc->base, intel_crtc_vblank_work);
+
 	WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
 
 	return 0;
@@ -16799,10 +16868,13 @@  static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	for_each_intel_crtc(dev, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
+		struct drm_vblank_work save_vblank_work =
+			crtc_state->vblank_work;
 
 		__drm_atomic_helper_crtc_destroy_state(&crtc_state->base);
 		memset(crtc_state, 0, sizeof(*crtc_state));
 		__drm_atomic_helper_crtc_reset(&crtc->base, &crtc_state->base);
+		crtc_state->vblank_work = save_vblank_work;
 
 		crtc_state->base.active = crtc_state->base.enable =
 			dev_priv->display.get_pipe_config(crtc, crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 33fd523c4622..8529687c59d6 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -41,6 +41,7 @@  struct drm_i915_private;
 struct drm_modeset_acquire_ctx;
 struct drm_plane;
 struct drm_plane_state;
+struct drm_vblank_work;
 struct i915_ggtt_view;
 struct intel_crtc;
 struct intel_crtc_state;
@@ -403,6 +404,14 @@  enum phy_fia {
 	     (__i)++) \
 		for_each_if(plane)
 
+#define for_each_old_intel_crtc_in_state(__state, crtc, old_crtc_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_crtc && \
+		     ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
+		      (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), 1); \
+	     (__i)++) \
+		for_each_if(crtc)
+
 #define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
 	for ((__i) = 0; \
 	     (__i) < (__state)->base.dev->mode_config.num_crtc && \
@@ -564,6 +573,8 @@  unsigned int i9xx_plane_max_stride(struct intel_plane *plane,
 				   u32 pixel_format, u64 modifier,
 				   unsigned int rotation);
 int bdw_get_pipemisc_bpp(struct intel_crtc *crtc);
+void intel_crtc_vblank_work(struct drm_vblank_work *work,
+			    u64 count);
 
 struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 36da0bbe99f1..9117f0a51dde 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -512,6 +512,8 @@  struct intel_atomic_state {
 	struct i915_sw_fence commit_ready;
 
 	struct llist_node freed;
+
+	atomic_t cleanup_count;
 };
 
 struct intel_plane_state {
@@ -992,6 +994,8 @@  struct intel_crtc_state {
 	/* Forward Error correction State */
 	bool fec_enable;
 
+	struct drm_vblank_work vblank_work;
+	struct intel_atomic_state *state;
 	u64 put_domains;
 };
 
@@ -1035,6 +1039,8 @@  struct intel_crtc {
 
 	/* scalers available on this crtc */
 	int num_scalers;
+
+	struct pm_qos_request pm_qos;
 };
 
 struct intel_plane {
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index ef18649e15a0..b6c301c75b9b 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -72,6 +72,44 @@  int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
 			    1000 * adjusted_mode->crtc_htotal);
 }
 
+bool intel_crtc_need_lut_load(const struct intel_crtc_state *crtc_state);
+
+static bool intel_crtc_need_vblank_work(struct intel_atomic_state *state,
+					const struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->base.active)
+		return false;
+
+	return intel_crtc_need_lut_load(crtc_state) ||
+		crtc_state->wm.need_postvbl_update;
+}
+
+static int intel_crtc_schedule_vblank_work(struct intel_atomic_state *state,
+					   struct intel_crtc_state *new_crtc_state,
+					   u64 vbl_count)
+{
+	int ret;
+
+	if (WARN_ON(!new_crtc_state->base.active))
+		return -EBUSY;
+
+	/*
+	 * drm_atomic_helper_swap_state() has already made
+	 * new_crtc_state->base.state NULL hence we need our own pointer.
+	 */
+	new_crtc_state->state = state;
+	atomic_inc(&state->cleanup_count);
+
+	ret = drm_vblank_work_schedule(&new_crtc_state->vblank_work,
+				       vbl_count, false);
+	if (ret) {
+		new_crtc_state->state = NULL;
+		atomic_dec(&state->cleanup_count);
+	}
+
+	return ret;
+}
+
 /* FIXME: We should instead only take spinlocks once for the entire update
  * instead of once per mmio. */
 #if IS_ENABLED(CONFIG_PROVE_LOCKING)
@@ -195,13 +233,15 @@  void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
 
 /**
  * intel_pipe_update_end() - end update of a set of display registers
+ * @state: the atomic state
  * @new_crtc_state: the new crtc state
  *
  * Mark the end of an update started with intel_pipe_update_start(). This
  * re-enables interrupts and verifies the update was actually completed
  * before a vblank.
  */
-void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
+int intel_pipe_update_end(struct intel_atomic_state *state,
+			  struct intel_crtc_state *new_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
 	enum pipe pipe = crtc->pipe;
@@ -209,6 +249,8 @@  void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
 	u32 end_vbl_count = intel_crtc_get_vblank_counter(crtc);
 	ktime_t end_vbl_time = ktime_get();
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u64 vbl_count;
+	int ret = 0;
 
 	trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
 
@@ -221,15 +263,22 @@  void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
 
 		spin_lock(&crtc->base.dev->event_lock);
 		drm_crtc_arm_vblank_event(&crtc->base, new_crtc_state->base.event);
+		vbl_count = new_crtc_state->base.event->sequence;
 		spin_unlock(&crtc->base.dev->event_lock);
 
 		new_crtc_state->base.event = NULL;
+	} else if (intel_crtc_need_vblank_work(state, new_crtc_state)) {
+		vbl_count = drm_crtc_accurate_vblank_count(&crtc->base) + 1;
 	}
 
+	if (intel_crtc_need_vblank_work(state, new_crtc_state))
+		ret = intel_crtc_schedule_vblank_work(state, new_crtc_state,
+						      vbl_count);
+
 	local_irq_enable();
 
 	if (intel_vgpu_active(dev_priv))
-		return;
+		return ret;
 
 	if (crtc->debug.start_vbl_count &&
 	    crtc->debug.start_vbl_count != end_vbl_count) {
@@ -248,6 +297,7 @@  void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
 			 ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time),
 			 VBLANK_EVASION_TIME_US);
 #endif
+	return ret;
 }
 
 int intel_plane_check_stride(const struct intel_plane_state *plane_state)
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h
index 093a2d156f1e..3e8426c0c54c 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.h
+++ b/drivers/gpu/drm/i915/display/intel_sprite.h
@@ -25,7 +25,8 @@  struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
 void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state);
-void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state);
+int intel_pipe_update_end(struct intel_atomic_state *state,
+			  struct intel_crtc_state *new_crtc_state);
 int intel_plane_check_stride(const struct intel_plane_state *plane_state);
 int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state);
 int chv_plane_check_rotation(const struct intel_plane_state *plane_state);