[3/8] radeonsi: refactor si_update_vgt_shader_config

Submitted by Marek Olšák on June 20, 2019, 4:19 a.m.

Details

Message ID 20190620041941.14001-3-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák June 20, 2019, 4:19 a.m.
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

We'll have to extend this at some point, and using a bitfield union in
this way makes it easier to get the right index without excessive
branching.
---
 src/gallium/drivers/radeonsi/si_pipe.h        | 23 ++++++-
 .../drivers/radeonsi/si_state_shaders.c       | 65 +++++++++++--------
 2 files changed, 60 insertions(+), 28 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d32feab52c2..368cb4e473d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -760,20 +760,41 @@  union si_vgt_param_key {
 		unsigned count_from_stream_output:1;
 		unsigned primitive_restart:1;
 		unsigned multi_instances_smaller_than_primgroup:1;
 		unsigned uses_instancing:1;
 		unsigned prim:4;
 #endif
 	} u;
 	uint32_t index;
 };
 
+#define SI_NUM_VGT_STAGES_KEY_BITS 2
+#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
+
+/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
+ * Some fields are set by state-change calls, most are set by draw_vbo.
+ */
+union si_vgt_stages_key {
+	struct {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+		unsigned tess:1;
+		unsigned gs:1;
+		unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
+#else /* PIPE_ARCH_BIG_ENDIAN */
+		unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
+		unsigned gs:1;
+		unsigned tess:1;
+#endif
+	} u;
+	uint32_t index;
+};
+
 struct si_texture_handle
 {
 	unsigned			desc_slot;
 	bool				desc_dirty;
 	struct pipe_sampler_view	*view;
 	struct si_sampler_state		sstate;
 };
 
 struct si_image_handle
 {
@@ -914,21 +935,21 @@  struct si_context {
 	struct si_streamout		streamout;
 	struct si_viewports		viewports;
 	unsigned			num_window_rectangles;
 	bool				window_rectangles_include;
 	struct pipe_scissor_state	window_rectangles[4];
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
 	struct si_pm4_state		*init_config_gs_rings;
 	bool				init_config_has_vgt_flush;
-	struct si_pm4_state		*vgt_shader_config[4];
+	struct si_pm4_state		*vgt_shader_config[SI_NUM_VGT_STAGES_STATES];
 
 	/* shaders */
 	struct si_shader_ctx_state	ps_shader;
 	struct si_shader_ctx_state	gs_shader;
 	struct si_shader_ctx_state	vs_shader;
 	struct si_shader_ctx_state	tcs_shader;
 	struct si_shader_ctx_state	tes_shader;
 	struct si_shader_ctx_state	cs_prim_discard_state;
 	struct si_cs_shader_state	cs_shader_state;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index fab2e255742..0e3a1c3a776 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3311,74 +3311,83 @@  static void si_init_tess_factor_ring(struct si_context *sctx)
 	}
 
 	/* Flush the context to re-emit the init_config state.
 	 * This is done only once in a lifetime of a context.
 	 */
 	si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
 	sctx->initial_gfx_cs_size = 0; /* force flush */
 	si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
 }
 
-static void si_update_vgt_shader_config(struct si_context *sctx)
+static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
+						       union si_vgt_stages_key key)
 {
-	/* Calculate the index of the config.
-	 * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
-	unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
-	struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
+	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	uint32_t stages = 0;
 
-	if (!*pm4) {
-		uint32_t stages = 0;
+	if (key.u.tess) {
+		stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+		          S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
 
-		*pm4 = CALLOC_STRUCT(si_pm4_state);
+		if (key.u.gs)
+			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
+				  S_028B54_GS_EN(1);
+		else
+			stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+	} else if (key.u.gs) {
+		stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+			  S_028B54_GS_EN(1);
+	}
 
-		if (sctx->tes_shader.cso) {
-			stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
-				  S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
+	if (key.u.gs)
+		stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 
-			if (sctx->gs_shader.cso)
-				stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
-					  S_028B54_GS_EN(1) |
-				          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
-			else
-				stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
-		} else if (sctx->gs_shader.cso) {
-			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
-				  S_028B54_GS_EN(1) |
-			          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
-		}
+	if (screen->info.chip_class >= GFX9)
+		stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
 
-		if (sctx->chip_class >= GFX9)
-			stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+	si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
+	return pm4;
+}
 
-		si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
-	}
+static void si_update_vgt_shader_config(struct si_context *sctx,
+					union si_vgt_stages_key key)
+{
+	struct si_pm4_state **pm4 = &sctx->vgt_shader_config[key.index];
+
+	if (unlikely(!*pm4))
+		*pm4 = si_build_vgt_shader_config(sctx->screen, key);
 	si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
 }
 
 bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_compiler_ctx_state compiler_state;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct si_shader *old_vs = si_get_vs_state(sctx);
 	bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
 	struct si_shader *old_ps = sctx->ps_shader.current;
+	union si_vgt_stages_key key;
 	unsigned old_spi_shader_col_format =
 		old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
 	int r;
 
 	compiler_state.compiler = &sctx->compiler;
 	compiler_state.debug = sctx->debug;
 	compiler_state.is_debug_context = sctx->is_debug;
 
+	key.index = 0;
+
 	/* Update stages before GS. */
 	if (sctx->tes_shader.cso) {
+		key.u.tess = 1;
+
 		if (!sctx->tess_rings) {
 			si_init_tess_factor_ring(sctx);
 			if (!sctx->tess_rings)
 				return false;
 		}
 
 		/* VS as LS */
 		if (sctx->chip_class <= GFX8) {
 			r = si_shader_select(ctx, &sctx->vs_shader,
 					     &compiler_state);
@@ -3443,35 +3452,37 @@  bool si_update_shaders(struct si_context *sctx)
 		r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
 		si_pm4_bind_state(sctx, ls, NULL);
 		si_pm4_bind_state(sctx, hs, NULL);
 	}
 
 	/* Update GS. */
 	if (sctx->gs_shader.cso) {
+		key.u.gs = 1;
+
 		r = si_shader_select(ctx, &sctx->gs_shader, &compiler_state);
 		if (r)
 			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader.cso->gs_copy_shader->pm4);
 
 		if (!si_update_gs_ring_buffers(sctx))
 			return false;
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
 		if (sctx->chip_class <= GFX8)
 			si_pm4_bind_state(sctx, es, NULL);
 	}
 
-	si_update_vgt_shader_config(sctx);
+	si_update_vgt_shader_config(sctx, key);
 
 	if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
 
 	if (sctx->ps_shader.cso) {
 		unsigned db_shader_control;
 
 		r = si_shader_select(ctx, &sctx->ps_shader, &compiler_state);
 		if (r)
 			return false;