[2/4] radeonsi/gfx9: apply the scissor bug workaround to si_emit_streamout_end

Submitted by Marek Olšák on April 17, 2019, 11:39 p.m.

Details

Message ID 1555544353-27275-2-git-send-email-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák April 17, 2019, 11:39 p.m.
From: Marek Olšák <marek.olsak@amd.com>

si_emit_streamout_end is called directly, it's not a state.

Cc: 19.0 <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_pipe.c            |  2 ++
 src/gallium/drivers/radeonsi/si_pipe.h            |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c      |  2 +-
 src/gallium/drivers/radeonsi/si_state_streamout.c | 10 ++++++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5caeb57..43c4914 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1069,20 +1069,22 @@  struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 
 	sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
 					 sscreen->info.max_se >= 2 &&
 					 !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
 	sscreen->assume_no_z_fights =
 		driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
 	sscreen->commutative_blend_add =
 		driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
 	sscreen->clear_db_cache_before_clear =
 		driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
+	sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
+					sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
 					    sscreen->info.family <= CHIP_POLARIS12) ||
 					   sscreen->info.family == CHIP_VEGA10 ||
 					   sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
 					sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;
 
 	/* Only enable primitive binning on APUs by default. */
 	sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 301d386..ee53192 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -463,20 +463,21 @@  struct si_screen {
 	unsigned			eqaa_force_coverage_samples;
 	unsigned			eqaa_force_z_samples;
 	unsigned			eqaa_force_color_samples;
 	bool				has_clear_state;
 	bool				has_distributed_tess;
 	bool				has_draw_indirect_multi;
 	bool				has_out_of_order_rast;
 	bool				assume_no_z_fights;
 	bool				commutative_blend_add;
 	bool				clear_db_cache_before_clear;
+	bool				has_gfx9_scissor_bug;
 	bool				has_msaa_sample_loc_bug;
 	bool				has_ls_vgpr_init_bug;
 	bool				has_dcc_constant_encode;
 	bool				dpbb_allowed;
 	bool				dfsm_allowed;
 	bool				llvm_has_working_vgpr_indexing;
 
 	/* Whether shaders are monolithic (1-part) or separate (3-part). */
 	bool				use_monolithic_shaders;
 	bool				record_llvm_ir;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2a514f1..e2fba41 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1220,21 +1220,21 @@  static void si_get_draw_start_count(struct si_context *sctx,
 }
 
 static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
 			       unsigned skip_atom_mask)
 {
 	unsigned num_patches = 0;
 	/* Vega10/Raven scissor bug workaround. When any context register is
 	 * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
 	 * registers must be written too.
 	 */
-	bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
+	bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug &&
 				  !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
 	bool context_roll = false; /* set correctly for GFX9 only */
 
 	context_roll |= si_emit_rasterizer_prim_state(sctx);
 	if (sctx->tes_shader.cso)
 		context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
 
 	if (handle_scissor_bug &&
 	    (info->count_from_stream_output ||
 	     sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 2bf6862..de0b051 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -269,20 +269,21 @@  static void si_emit_streamout_begin(struct si_context *sctx)
 
 	sctx->streamout.begin_emitted = true;
 }
 
 void si_emit_streamout_end(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	struct si_streamout_target **t = sctx->streamout.targets;
 	unsigned i;
 	uint64_t va;
+	bool context_reg_changed = false;
 
 	si_flush_vgt_streamout(sctx);
 
 	for (i = 0; i < sctx->streamout.num_targets; i++) {
 		if (!t[i])
 			continue;
 
 		va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
 		radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
 		radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
@@ -296,25 +297,34 @@  void si_emit_streamout_end(struct si_context *sctx)
 		radeon_add_to_buffer_list(sctx,  sctx->gfx_cs,
 					  t[i]->buf_filled_size,
 					  RADEON_USAGE_WRITE,
 					  RADEON_PRIO_SO_FILLED_SIZE);
 
 		/* Zero the buffer size. The counters (primitives generated,
 		 * primitives emitted) may be enabled even if there is not
 		 * buffer bound. This ensures that the primitives-emitted query
 		 * won't increment. */
 		radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+		context_reg_changed = true;
 
 		t[i]->buf_filled_size_valid = true;
 	}
 
 	sctx->streamout.begin_emitted = false;
+
+	/* If we caused a context roll (= changed context registers),
+	 * we need to apply the scissor bug workaround.
+	 */
+	if (sctx->screen->has_gfx9_scissor_bug && context_reg_changed) {
+		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+		si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
+	}
 }
 
 /* STREAMOUT CONFIG DERIVED STATE
  *
  * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
  * The buffer mask is an independent state, so no writes occur if there
  * are no buffers bound.
  */
 
 static void si_emit_streamout_enable(struct si_context *sctx)