[03/13] radeonsi: put both tessellation rings into 1 buffer

Submitted by Marek Olšák on Feb. 17, 2018, 7:43 p.m.

Details

Message ID 1518896608-12843-4-git-send-email-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: Reduce user SGPR usage" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 17, 2018, 7:43 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.c          |  3 +-
 src/gallium/drivers/radeonsi/si_pipe.h          |  3 +-
 src/gallium/drivers/radeonsi/si_state_draw.c    |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 39 ++++++++++---------------
 4 files changed, 18 insertions(+), 29 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 83133cb..6deabba 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -110,22 +110,21 @@  static void si_destroy_context(struct pipe_context *context)
 	 * properly.
 	 */
 	struct pipe_framebuffer_state fb = {};
 	if (context->set_framebuffer_state)
 		context->set_framebuffer_state(context, &fb);
 
 	si_release_all_descriptors(sctx);
 
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
-	pipe_resource_reference(&sctx->tf_ring, NULL);
-	pipe_resource_reference(&sctx->tess_offchip_ring, NULL);
+	pipe_resource_reference(&sctx->tess_rings, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
 	r600_resource_reference(&sctx->border_color_buffer, NULL);
 	free(sctx->border_color_table);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
 	r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
 	r600_resource_reference(&sctx->wait_mem_scratch, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
 	if (sctx->init_config_gs_rings)
 		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 7b23e8c..896b640 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -505,22 +505,21 @@  struct si_context {
 	unsigned			shader_needs_decompress_mask;
 	struct si_buffer_resources	rw_buffers;
 	struct si_buffer_resources	const_and_shader_buffers[SI_NUM_SHADERS];
 	struct si_samplers		samplers[SI_NUM_SHADERS];
 	struct si_images		images[SI_NUM_SHADERS];
 
 	/* other shader resources */
 	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
-	struct pipe_resource		*tf_ring;
-	struct pipe_resource		*tess_offchip_ring;
+	struct pipe_resource		*tess_rings;
 	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
 	struct r600_resource		*border_color_buffer;
 	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
 	unsigned			border_color_count;
 	unsigned			num_vs_blit_sgprs;
 	uint32_t			vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
 	bool				vertex_buffer_pointer_dirty;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b245a38..3881e3f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -289,21 +289,21 @@  static void si_emit_derived_tess_state(struct si_context *sctx,
 			R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
 		radeon_emit(cs, offchip_layout);
 		radeon_emit(cs, tcs_out_offsets);
 		radeon_emit(cs, tcs_out_layout);
 		radeon_emit(cs, tcs_in_layout);
 	}
 
 	/* Set userdata SGPRs for TES. */
 	radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
 	radeon_emit(cs, offchip_layout);
-	radeon_emit(cs, r600_resource(sctx->tess_offchip_ring)->gpu_address >> 16);
+	radeon_emit(cs, r600_resource(sctx->tess_rings)->gpu_address >> 16);
 
 	ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
 		       S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
 		       S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
 
 	if (sctx->b.chip_class >= CIK)
 		radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
 					   ls_hs_config);
 	else
 		radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9c505ff..57bf622 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2945,52 +2945,43 @@  static bool si_update_spi_tmpring_size(struct si_context *sctx)
 			   S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
 	if (spi_tmpring_size != sctx->spi_tmpring_size) {
 		sctx->spi_tmpring_size = spi_tmpring_size;
 		si_mark_atom_dirty(sctx, &sctx->scratch_state);
 	}
 	return true;
 }
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
-	assert(!sctx->tf_ring);
+	assert(!sctx->tess_rings);
 
 	/* Use 64K alignment for both rings, so that we can pass the address
 	 * to shaders as one SGPR containing bits [16:47].
 	 */
-	sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
-						 R600_RESOURCE_FLAG_UNMAPPABLE,
-						 PIPE_USAGE_DEFAULT,
-						 sctx->screen->tess_factor_ring_size,
-						 64 * 1024);
-	if (!sctx->tf_ring)
-		return;
-
-	sctx->tess_offchip_ring =
-		si_aligned_buffer_create(sctx->b.b.screen,
-					 R600_RESOURCE_FLAG_UNMAPPABLE,
-					 PIPE_USAGE_DEFAULT,
-					 sctx->screen->tess_offchip_ring_size,
-					 64 * 1024);
-	if (!sctx->tess_offchip_ring)
+	sctx->tess_rings = si_aligned_buffer_create(sctx->b.b.screen,
+						    R600_RESOURCE_FLAG_UNMAPPABLE,
+						    PIPE_USAGE_DEFAULT,
+						    align(sctx->screen->tess_offchip_ring_size,
+							  64 * 1024) +
+						    sctx->screen->tess_factor_ring_size,
+						    64 * 1024);
+	if (!sctx->tess_rings)
 		return;
 
 	si_init_config_add_vgt_flush(sctx);
 
-	uint64_t offchip_va = r600_resource(sctx->tess_offchip_ring)->gpu_address;
-	uint64_t factor_va = r600_resource(sctx->tf_ring)->gpu_address;
+	uint64_t offchip_va = r600_resource(sctx->tess_rings)->gpu_address;
 	assert((offchip_va & 0xffff) == 0);
-	assert((factor_va & 0xffff) == 0);
+	uint64_t factor_va = offchip_va +
+			     align(sctx->screen->tess_offchip_ring_size, 64 * 1024);
 
-	si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_offchip_ring),
-		      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
-	si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tf_ring),
+	si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_rings),
 		      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
 
 	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
 		si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
 			       S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
 		si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
 			       factor_va >> 8);
 		if (sctx->b.chip_class >= GFX9)
 			si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
@@ -3113,23 +3104,23 @@  bool si_update_shaders(struct si_context *sctx)
 	unsigned old_spi_shader_col_format =
 		old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
 	int r;
 
 	compiler_state.tm = sctx->tm;
 	compiler_state.debug = sctx->debug;
 	compiler_state.is_debug_context = sctx->is_debug;
 
 	/* Update stages before GS. */
 	if (sctx->tes_shader.cso) {
-		if (!sctx->tf_ring) {
+		if (!sctx->tess_rings) {
 			si_init_tess_factor_ring(sctx);
-			if (!sctx->tf_ring)
+			if (!sctx->tess_rings)
 				return false;
 		}
 
 		/* VS as LS */
 		if (sctx->b.chip_class <= VI) {
 			r = si_shader_select(ctx, &sctx->vs_shader,
 					     &compiler_state);
 			if (r)
 				return false;
 			si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);