[07/16] drm/i915: Optimize lut loads

Submitted by Ville Syrjälä on Sept. 6, 2019, 7:02 p.m.

Details

Message ID 20190906190247.5878-7-ville.syrjala@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 2 1 ) in Intel GFX - Try Bot

Not browsing as part of any series.

Commit Message

Ville Syrjälä Sept. 6, 2019, 7:02 p.m.
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_color.c | 138 +++++++++++++--------
 1 file changed, 86 insertions(+), 52 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index ba7251fe20ac..d2f0d229716a 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -469,7 +469,7 @@  static void i9xx_load_lut_8(struct intel_crtc *crtc,
 	lut = blob->data;
 
 	for (i = 0; i < 256; i++)
-		I915_WRITE(PALETTE(pipe, i), i9xx_lut_8(&lut[i]));
+		I915_WRITE_FW(PALETTE(pipe, i), i9xx_lut_8(&lut[i]));
 }
 
 static void i965_load_lut_10p6(struct intel_crtc *crtc,
@@ -481,15 +481,15 @@  static void i965_load_lut_10p6(struct intel_crtc *crtc,
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size - 1; i++) {
-		I915_WRITE(PALETTE(pipe, 2 * i + 0),
-			   i965_lut_10p6_ldw(&lut[i]));
-		I915_WRITE(PALETTE(pipe, 2 * i + 1),
-			   i965_lut_10p6_udw(&lut[i]));
+		I915_WRITE_FW(PALETTE(pipe, 2 * i + 0),
+			      i965_lut_10p6_ldw(&lut[i]));
+		I915_WRITE_FW(PALETTE(pipe, 2 * i + 1),
+			      i965_lut_10p6_udw(&lut[i]));
 	}
 
-	I915_WRITE(PIPEGCMAX(pipe, 0), lut[i].red);
-	I915_WRITE(PIPEGCMAX(pipe, 1), lut[i].green);
-	I915_WRITE(PIPEGCMAX(pipe, 2), lut[i].blue);
+	I915_WRITE_FW(PIPEGCMAX(pipe, 0), lut[i].red);
+	I915_WRITE_FW(PIPEGCMAX(pipe, 1), lut[i].green);
+	I915_WRITE_FW(PIPEGCMAX(pipe, 2), lut[i].blue);
 }
 
 static void i9xx_load_luts(const struct intel_crtc_state *crtc_state)
@@ -503,10 +503,14 @@  static void i9xx_load_luts(const struct intel_crtc_state *crtc_state)
 	else
 		assert_pll_enabled(dev_priv, crtc->pipe);
 
+	spin_lock_irq(&dev_priv->uncore.lock);
+
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
 		i9xx_load_lut_8(crtc, gamma_lut);
 	else
 		i965_load_lut_10p6(crtc, gamma_lut);
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 static void ilk_load_lut_8(struct intel_crtc *crtc,
@@ -523,7 +527,7 @@  static void ilk_load_lut_8(struct intel_crtc *crtc,
 	lut = blob->data;
 
 	for (i = 0; i < 256; i++)
-		I915_WRITE(LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i]));
+		I915_WRITE_FW(LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i]));
 }
 
 static void ilk_load_lut_10(struct intel_crtc *crtc,
@@ -535,18 +539,23 @@  static void ilk_load_lut_10(struct intel_crtc *crtc,
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++)
-		I915_WRITE(PREC_PALETTE(pipe, i), ilk_lut_10(&lut[i]));
+		I915_WRITE_FW(PREC_PALETTE(pipe, i), ilk_lut_10(&lut[i]));
 }
 
 static void ilk_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 
+	spin_lock_irq(&dev_priv->uncore.lock);
+
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT)
 		ilk_load_lut_8(crtc, gamma_lut);
 	else
 		ilk_load_lut_10(crtc, gamma_lut);
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 static int ivb_lut_10_size(u32 prec_index)
@@ -577,15 +586,15 @@  static void ivb_load_lut_10(struct intel_crtc *crtc,
 		const struct drm_color_lut *entry =
 			&lut[i * (lut_size - 1) / (hw_lut_size - 1)];
 
-		I915_WRITE(PREC_PAL_INDEX(pipe), prec_index++);
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(entry));
+		I915_WRITE_FW(PREC_PAL_INDEX(pipe), prec_index++);
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_10(entry));
 	}
 
 	/*
 	 * Reset the index, otherwise it prevents the legacy palette to be
 	 * written properly.
 	 */
-	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
+	I915_WRITE_FW(PREC_PAL_INDEX(pipe), 0);
 }
 
 /* On BDW+ the index auto increment mode actually works */
@@ -599,22 +608,22 @@  static void bdw_load_lut_10(struct intel_crtc *crtc,
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
-	I915_WRITE(PREC_PAL_INDEX(pipe), prec_index |
-		   PAL_PREC_AUTO_INCREMENT);
+	I915_WRITE_FW(PREC_PAL_INDEX(pipe), prec_index |
+		      PAL_PREC_AUTO_INCREMENT);
 
 	for (i = 0; i < hw_lut_size; i++) {
 		/* We discard half the user entries in split gamma mode */
 		const struct drm_color_lut *entry =
 			&lut[i * (lut_size - 1) / (hw_lut_size - 1)];
 
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(entry));
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_10(entry));
 	}
 
 	/*
 	 * Reset the index, otherwise it prevents the legacy palette to be
 	 * written properly.
 	 */
-	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
+	I915_WRITE_FW(PREC_PAL_INDEX(pipe), 0);
 }
 
 static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
@@ -623,9 +632,9 @@  static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
 	enum pipe pipe = crtc->pipe;
 
 	/* Program the max register to clamp values > 1.0. */
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16);
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16);
-	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16);
+	I915_WRITE_FW(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16);
+	I915_WRITE_FW(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16);
+	I915_WRITE_FW(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16);
 
 	/*
 	 * Program the gc max 2 register to clamp values > 1.0.
@@ -633,18 +642,21 @@  static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
 	 * from 3.0 to 7.0
 	 */
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16);
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16);
-		I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16);
+		I915_WRITE_FW(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16);
+		I915_WRITE_FW(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16);
+		I915_WRITE_FW(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16);
 	}
 }
 
 static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
 
+	spin_lock_irq(&dev_priv->uncore.lock);
+
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
 		ilk_load_lut_8(crtc, gamma_lut);
 	} else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
@@ -660,14 +672,19 @@  static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 				PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
 
+	spin_lock_irq(&dev_priv->uncore.lock);
+
 	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
 		ilk_load_lut_8(crtc, gamma_lut);
 	} else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
@@ -683,6 +700,8 @@  static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 				PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state)
@@ -699,8 +718,8 @@  static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state)
 	 * ignore the index bits, so we need to reset it to index 0
 	 * separately.
 	 */
-	I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0);
-	I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT);
+	I915_WRITE_FW(PRE_CSC_GAMC_INDEX(pipe), 0);
+	I915_WRITE_FW(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT);
 
 	for (i = 0; i < lut_size; i++) {
 		/*
@@ -716,12 +735,12 @@  static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state)
 		 * ToDo: Extend to max 7.0. Enable 32 bit input value
 		 * as compared to just 16 to achieve this.
 		 */
-		I915_WRITE(PRE_CSC_GAMC_DATA(pipe), lut[i].green);
+		I915_WRITE_FW(PRE_CSC_GAMC_DATA(pipe), lut[i].green);
 	}
 
 	/* Clamp values > 1.0. */
 	while (i++ < 35)
-		I915_WRITE(PRE_CSC_GAMC_DATA(pipe), 1 << 16);
+		I915_WRITE_FW(PRE_CSC_GAMC_DATA(pipe), 1 << 16);
 }
 
 static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_state)
@@ -737,24 +756,27 @@  static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_stat
 	 * ignore the index bits, so we need to reset it to index 0
 	 * separately.
 	 */
-	I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0);
-	I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT);
+	I915_WRITE_FW(PRE_CSC_GAMC_INDEX(pipe), 0);
+	I915_WRITE_FW(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT);
 
 	for (i = 0; i < lut_size; i++) {
 		u32 v = (i << 16) / (lut_size - 1);
 
-		I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v);
+		I915_WRITE_FW(PRE_CSC_GAMC_DATA(pipe), v);
 	}
 
 	/* Clamp values > 1.0. */
 	while (i++ < 35)
-		I915_WRITE(PRE_CSC_GAMC_DATA(pipe), 1 << 16);
+		I915_WRITE_FW(PRE_CSC_GAMC_DATA(pipe), 1 << 16);
 }
 
 static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	spin_lock_irq(&dev_priv->uncore.lock);
 
 	/*
 	 * On GLK+ both pipe CSC and degamma LUT are controlled
@@ -775,6 +797,8 @@  static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 /* ilk+ "12.4" interpolated format (high 10 bits) */
@@ -800,9 +824,9 @@  icl_load_gcmax(const struct intel_crtc_state *crtc_state,
 	enum pipe pipe = crtc->pipe;
 
 	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
-	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+	I915_WRITE_FW(PREC_PAL_GC_MAX(pipe, 0), color->red);
+	I915_WRITE_FW(PREC_PAL_GC_MAX(pipe, 1), color->green);
+	I915_WRITE_FW(PREC_PAL_GC_MAX(pipe, 2), color->blue);
 }
 
 static void
@@ -822,15 +846,15 @@  icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
 	 * Superfine segment has 9 entries, corresponding to values
 	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
 	 */
-	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	I915_WRITE_FW(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
 
 	for (i = 0; i < 9; i++) {
 		const struct drm_color_lut *entry = &lut[i];
 
-		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
-			   ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
-			   ilk_lut_12p4_udw(entry));
+		I915_WRITE_FW(PREC_PAL_MULTI_SEG_DATA(pipe),
+			      ilk_lut_12p4_ldw(entry));
+		I915_WRITE_FW(PREC_PAL_MULTI_SEG_DATA(pipe),
+			      ilk_lut_12p4_udw(entry));
 	}
 }
 
@@ -856,11 +880,11 @@  icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
 	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
 	 * with seg2[0] being unused by the hardware.
 	 */
-	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	I915_WRITE_FW(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
 	for (i = 1; i < 257; i++) {
 		entry = &lut[i * 8];
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
 	}
 
 	/*
@@ -877,8 +901,8 @@  icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
 	 */
 	for (i = 0; i < 256; i++) {
 		entry = &lut[i * 8 * 128];
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
-		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE_FW(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
 	}
 
 	/* The last entry in the LUT is to be programmed in GCMAX */
@@ -891,6 +915,9 @@  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	spin_lock_irq(&dev_priv->uncore.lock);
 
 	if (crtc_state->base.degamma_lut)
 		glk_load_degamma_lut(crtc_state);
@@ -909,6 +936,8 @@  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color)
@@ -931,10 +960,10 @@  static void chv_load_cgm_degamma(struct intel_crtc *crtc,
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++) {
-		I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 0),
-			   chv_cgm_degamma_ldw(&lut[i]));
-		I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 1),
-			   chv_cgm_degamma_udw(&lut[i]));
+		I915_WRITE_FW(CGM_PIPE_DEGAMMA(pipe, i, 0),
+			      chv_cgm_degamma_ldw(&lut[i]));
+		I915_WRITE_FW(CGM_PIPE_DEGAMMA(pipe, i, 1),
+			      chv_cgm_degamma_udw(&lut[i]));
 	}
 }
 
@@ -958,16 +987,17 @@  static void chv_load_cgm_gamma(struct intel_crtc *crtc,
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++) {
-		I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 0),
-			   chv_cgm_gamma_ldw(&lut[i]));
-		I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 1),
-			   chv_cgm_gamma_udw(&lut[i]));
+		I915_WRITE_FW(CGM_PIPE_GAMMA(pipe, i, 0),
+			      chv_cgm_gamma_ldw(&lut[i]));
+		I915_WRITE_FW(CGM_PIPE_GAMMA(pipe, i, 1),
+			      chv_cgm_gamma_udw(&lut[i]));
 	}
 }
 
 static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
 	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
 
@@ -978,11 +1008,15 @@  static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 		return;
 	}
 
+	spin_lock_irq(&dev_priv->uncore.lock);
+
 	if (degamma_lut)
 		chv_load_cgm_degamma(crtc, degamma_lut);
 
 	if (gamma_lut)
 		chv_load_cgm_gamma(crtc, gamma_lut);
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state)