[6/6] radeonsi:optimizing SET_CONTEXT_REG for shaders vgt_vertex_reuse

Submitted by Jiang, Sonny on Sept. 18, 2018, 8:21 p.m.

Details

Message ID 20180918202115.9125-6-sonny.jiang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Browsing this patch as part of:
"Series without cover letter" rev 1 in Mesa
<< prev patch [6/6] next patch >>

Commit Message

Jiang, Sonny Sept. 18, 2018, 8:21 p.m.
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  1 +
 src/gallium/drivers/radeonsi/si_state.h       |  1 +
 .../drivers/radeonsi/si_state_shaders.c       | 25 +++++++++----------
 3 files changed, 14 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 7cf1f6f4b7..77b7bf8f76 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -379,6 +379,7 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT]  = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK]  = 0xffffffff;
 		ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM]  = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL]  = 0x0000001e; /* From VI */
 
 		/* Set all saved registers state to saved. */
 		ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 54b03e0992..fffc63680d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -313,6 +313,7 @@  enum si_tracked_reg {
 
 	SI_TRACKED_CB_SHADER_MASK,
 	SI_TRACKED_VGT_TF_PARAM,
+	SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 
 	SI_NUM_TRACKED_REGS,
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a8d2769475..21564ff186 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -417,14 +417,13 @@  static void si_set_tesseval_regs(struct si_context *sctx,
  *
  * If "shader" is NULL, it's assumed it's not LS or GS copy shader.
  */
-static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
+static void polaris_set_vgt_vertex_reuse(struct si_context *sctx,
 					 struct si_shader_selector *sel,
-					 struct si_shader *shader,
-					 struct si_pm4_state *pm4)
+					 struct si_shader *shader)
 {
 	unsigned type = sel->type;
 
-	if (sscreen->info.family < CHIP_POLARIS10)
+	if (sctx->family < CHIP_POLARIS10)
 		return;
 
 	/* VS as VS, or VS as ES: */
@@ -440,8 +439,10 @@  static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
 		    PIPE_TESS_SPACING_FRACTIONAL_ODD)
 			vtx_reuse_depth = 14;
 
-		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
-			       vtx_reuse_depth);
+		radeon_opt_set_context_reg(sctx,
+				R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+				SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
+				vtx_reuse_depth);
 	}
 }
 
@@ -572,6 +573,7 @@  static void si_emit_shader_es(struct si_context *sctx)
 	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
 		si_set_tesseval_regs(sctx, shader->selector);
 
+	polaris_set_vgt_vertex_reuse(sctx, shader->selector, shader);
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -616,8 +618,6 @@  static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 		       S_00B32C_USER_SGPR(num_user_sgprs) |
 		       S_00B32C_OC_LDS_EN(oc_lds_en) |
 		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
-
-	polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
 }
 
 static unsigned si_conv_prim_to_gs_out(unsigned mode)
@@ -832,6 +832,8 @@  static void si_emit_shader_gs(struct si_context *sctx)
 
 		if (shader->key.part.gs.es->type == PIPE_SHADER_TESS_EVAL)
 			si_set_tesseval_regs(sctx, shader->key.part.gs.es);
+
+		polaris_set_vgt_vertex_reuse(sctx, shader->key.part.gs.es, NULL);
 	}
 }
 
@@ -899,9 +901,6 @@  static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 			       S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
 			       S_00B22C_LDS_SIZE(gs_info.lds_size) |
 			       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
-
-		polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es,
-					     NULL, pm4);
 	} else {
 		si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
 		si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, S_00B224_MEM_BASE(va >> 40));
@@ -1004,6 +1003,8 @@  static void si_emit_shader_vs(struct si_context *sctx)
 
 	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
 		si_set_tesseval_regs(sctx, shader->selector);
+
+	polaris_set_vgt_vertex_reuse(sctx, shader->selector, shader);
 }
 
 /**
@@ -1072,8 +1073,6 @@  static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
 		       S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
 		       S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
 		       S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
-
-	polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
 }
 
 static unsigned si_get_ps_num_interp(struct si_shader *ps)