[24/26] radeonsi: add si_vs_prolog_bits::unpack_instance_id_from_vertex_id:1

Submitted by Marek Olšák on Feb. 13, 2019, 5:16 a.m.

Details

Message ID 20190213051621.6235-25-maraeo@gmail.com
State Accepted
Commit 4eb377d1c3363b3a372fb2c812fc741730c2cef7
Headers show
Series "RadeonSI: Primitive culling with async compute" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 13, 2019, 5:16 a.m.
From: Marek Olšák <marek.olsak@amd.com>

The prim discard compute shader bakes InstanceID into the output index buffer.
---
 src/gallium/drivers/radeonsi/si_shader.c | 25 ++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 2 files changed, 24 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6e3019a9f6c..ba43f0ff902 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5788,6 +5788,8 @@  static void si_dump_shader_key_vs(const struct si_shader_key *key,
 		prefix, prolog->instance_divisor_is_one);
 	fprintf(f, "  %s.instance_divisor_is_fetched = %u\n",
 		prefix, prolog->instance_divisor_is_fetched);
+	fprintf(f, "  %s.unpack_instance_id_from_vertex_id = %u\n",
+		prefix, prolog->unpack_instance_id_from_vertex_id);
 	fprintf(f, "  %s.ls_vgpr_fix = %u\n",
 		prefix, prolog->ls_vgpr_fix);
 
@@ -7214,8 +7216,21 @@  static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 		}
 	}
 
-	ctx->abi.vertex_id = input_vgprs[first_vs_vgpr];
-	ctx->abi.instance_id = input_vgprs[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)];
+	unsigned vertex_id_vgpr = first_vs_vgpr;
+	unsigned instance_id_vgpr = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
+
+	ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
+	ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
+
+	/* InstanceID = VertexID >> 16;
+	 * VertexID   = VertexID & 0xffff;
+	 */
+	if (key->vs_prolog.states.unpack_instance_id_from_vertex_id) {
+		ctx->abi.instance_id = LLVMBuildLShr(ctx->ac.builder, ctx->abi.vertex_id,
+						     LLVMConstInt(ctx->i32, 16, 0), "");
+		ctx->abi.vertex_id = LLVMBuildAnd(ctx->ac.builder, ctx->abi.vertex_id,
+						  LLVMConstInt(ctx->i32, 0xffff, 0), "");
+	}
 
 	/* Copy inputs to outputs. This should be no-op, as the registers match,
 	 * but it will prevent the compiler from overwriting them unintentionally.
@@ -7227,6 +7242,12 @@  static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 	}
 	for (i = 0; i < num_input_vgprs; i++) {
 		LLVMValueRef p = input_vgprs[i];
+
+		if (i == vertex_id_vgpr)
+			p = ctx->abi.vertex_id;
+		else if (i == instance_id_vgpr)
+			p = ctx->abi.instance_id;
+
 		p = ac_to_float(&ctx->ac, p);
 		ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p,
 					   key->vs_prolog.num_input_sgprs + i, "");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index f9f81a7bc1e..28f32be30c8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -412,6 +412,7 @@  struct si_vs_prolog_bits {
 	uint16_t	instance_divisor_is_one;     /* bitmask of inputs */
 	uint16_t	instance_divisor_is_fetched; /* bitmask of inputs */
 	unsigned	ls_vgpr_fix:1;
+	unsigned	unpack_instance_id_from_vertex_id:1;
 };
 
 /* Common TCS bits between the shader key and the epilog key. */