[4/6] radeonsi:optimizing SET_CONTEXT_REG for shaders PS

Submitted by Jiang, Sonny on Sept. 18, 2018, 8:21 p.m.

Details

Message ID 20180918202115.9125-4-sonny.jiang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Browsing this patch as part of:
"Series without cover letter" rev 1 in Mesa
<< prev patch [4/6] next patch >>

Commit Message

Jiang, Sonny Sept. 18, 2018, 8:21 p.m.
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |   7 +
 src/gallium/drivers/radeonsi/si_state.h       |  11 ++
 .../drivers/radeonsi/si_state_shaders.c       | 144 ++++++++++--------
 3 files changed, 98 insertions(+), 64 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 2e10d766a6..8c1bee8ed6 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -371,6 +371,13 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG]  = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT]  = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL]  = 0x00000002;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
 
 		/* Set all saved registers state to saved. */
 		ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bf1ae9f18f..878b67f0ed 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -302,6 +302,17 @@  enum si_tracked_reg {
 	SI_TRACKED_SPI_SHADER_POS_FORMAT,
 	SI_TRACKED_PA_CL_VTE_CNTL,
 
+	SI_TRACKED_SPI_PS_INPUT_ENA, /* 2 consecutive registers */
+	SI_TRACKED_SPI_PS_INPUT_ADDR,
+
+	SI_TRACKED_SPI_BARYC_CNTL,
+	SI_TRACKED_SPI_PS_IN_CONTROL,
+
+	SI_TRACKED_SPI_SHADER_Z_FORMAT, /* 2 consecutive registers */
+	SI_TRACKED_SPI_SHADER_COL_FORMAT,
+
+	SI_TRACKED_CB_SHADER_MASK,
+
 	SI_NUM_TRACKED_REGS,
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 332fdae3b3..e5732f7920 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1100,12 +1100,88 @@  static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
 	return value;
 }
 
-static void si_shader_ps(struct si_shader *shader)
+static void si_emit_shader_ps(struct si_context *sctx)
 {
+	struct si_shader *shader = sctx->queued.named.ps->shader;
+	if (!shader)
+		return;
+
 	struct tgsi_shader_info *info = &shader->selector->info;
-	struct si_pm4_state *pm4;
-	unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
 	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
+	unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
+
+	/* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
+	radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA,
+				    SI_TRACKED_SPI_PS_INPUT_ENA,
+				    shader->config.spi_ps_input_ena,
+				    shader->config.spi_ps_input_addr);
+
+	/* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
+	 * Possible vaules:
+	 * 0 -> Position = pixel center
+	 * 1 -> Position = pixel centroid
+	 * 2 -> Position = at sample position
+	 *
+	 * From GLSL 4.5 specification, section 7.1:
+	 *   "The variable gl_FragCoord is available as an input variable from
+	 *    within fragment shaders and it holds the window relative coordinates
+	 *    (x, y, z, 1/w) values for the fragment. If multi-sampling, this
+	 *    value can be for any location within the pixel, or one of the
+	 *    fragment samples. The use of centroid does not further restrict
+	 *    this value to be inside the current primitive."
+	 *
+	 * Meaning that centroid has no effect and we can return anything within
+	 * the pixel. Thus, return the value at sample position, because that's
+	 * the most accurate one shaders can get.
+	 */
+	spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
+
+	if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
+	    TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
+		spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
+
+	/* Set interpolation controls. */
+	spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
+
+	radeon_opt_set_context_reg(sctx, R_0286E0_SPI_BARYC_CNTL,
+				   SI_TRACKED_SPI_BARYC_CNTL, spi_baryc_cntl);
+	radeon_opt_set_context_reg(sctx, R_0286D8_SPI_PS_IN_CONTROL,
+				   SI_TRACKED_SPI_PS_IN_CONTROL,
+				   spi_ps_in_control);
+
+	spi_shader_col_format = si_get_spi_shader_col_format(shader);
+	cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format);
+
+	/* Ensure that some export memory is always allocated, for two reasons:
+	 *
+	 * 1) Correctness: The hardware ignores the EXEC mask if no export
+	 *    memory is allocated, so KILL and alpha test do not work correctly
+	 *    without this.
+	 * 2) Performance: Every shader needs at least a NULL export, even when
+	 *    it writes no color/depth output. The NULL export instruction
+	 *    stalls without this setting.
+	 *
+	 * Don't add this to CB_SHADER_MASK.
+	 */
+	if (!spi_shader_col_format &&
+	    !info->writes_z && !info->writes_stencil && !info->writes_samplemask)
+		spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+
+	/* R_028710_SPI_SHADER_Z_FORMAT, R_028714_SPI_SHADER_COL_FORMAT */
+	radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT,
+			SI_TRACKED_SPI_SHADER_Z_FORMAT,
+			ac_get_spi_shader_z_format(info->writes_z,
+						   info->writes_stencil,
+						   info->writes_samplemask),
+			spi_shader_col_format);
+
+	radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
+				   SI_TRACKED_CB_SHADER_MASK, cb_shader_mask);
+}
+
+static void si_shader_ps(struct si_shader *shader)
+{
+	struct si_pm4_state *pm4;
 	uint64_t va;
 	unsigned input_ena = shader->config.spi_ps_input_ena;
 
@@ -1157,67 +1233,7 @@  static void si_shader_ps(struct si_shader *shader)
 	if (!pm4)
 		return;
 
-	/* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
-	 * Possible vaules:
-	 * 0 -> Position = pixel center
-	 * 1 -> Position = pixel centroid
-	 * 2 -> Position = at sample position
-	 *
-	 * From GLSL 4.5 specification, section 7.1:
-	 *   "The variable gl_FragCoord is available as an input variable from
-	 *    within fragment shaders and it holds the window relative coordinates
-	 *    (x, y, z, 1/w) values for the fragment. If multi-sampling, this
-	 *    value can be for any location within the pixel, or one of the
-	 *    fragment samples. The use of centroid does not further restrict
-	 *    this value to be inside the current primitive."
-	 *
-	 * Meaning that centroid has no effect and we can return anything within
-	 * the pixel. Thus, return the value at sample position, because that's
-	 * the most accurate one shaders can get.
-	 */
-	spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
-
-	if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
-	    TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
-		spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
-
-	spi_shader_col_format = si_get_spi_shader_col_format(shader);
-	cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format);
-
-	/* Ensure that some export memory is always allocated, for two reasons:
-	 *
-	 * 1) Correctness: The hardware ignores the EXEC mask if no export
-	 *    memory is allocated, so KILL and alpha test do not work correctly
-	 *    without this.
-	 * 2) Performance: Every shader needs at least a NULL export, even when
-	 *    it writes no color/depth output. The NULL export instruction
-	 *    stalls without this setting.
-	 *
-	 * Don't add this to CB_SHADER_MASK.
-	 */
-	if (!spi_shader_col_format &&
-	    !info->writes_z && !info->writes_stencil && !info->writes_samplemask)
-		spi_shader_col_format = V_028714_SPI_SHADER_32_R;
-
-	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
-	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
-		       shader->config.spi_ps_input_addr);
-
-	/* Set interpolation controls. */
-	spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
-
-	/* Set registers. */
-	si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
-	si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
-
-	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
-		       ac_get_spi_shader_z_format(info->writes_z,
-						  info->writes_stencil,
-						  info->writes_samplemask));
-
-	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
-	si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
-
+	pm4->atom.emit = si_emit_shader_ps;
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);