[6/9] radeonsi: use the ac helper for index buffer stores in the culling shader

Submitted by Marek Olšák on June 4, 2019, 12:02 a.m.

Details

Message ID 20190604000258.9143-6-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák June 4, 2019, 12:02 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/amd/common/ac_llvm_build.c                |  5 +++--
 src/amd/common/ac_llvm_build.h                |  1 +
 src/amd/common/ac_nir_to_llvm.c               |  2 +-
 .../radeonsi/si_compute_prim_discard.c        | 21 ++++++++-----------
 .../drivers/radeonsi/si_shader_tgsi_mem.c     |  2 +-
 5 files changed, 15 insertions(+), 16 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2a1a133c392..894d01ca036 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1187,30 +1187,31 @@  ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
 }
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
 			     LLVMValueRef rsrc,
 			     LLVMValueRef data,
 			     LLVMValueRef vindex,
 			     LLVMValueRef voffset,
 			     unsigned num_channels,
 			     bool glc,
+			     bool slc,
 			     bool writeonly_memory)
 {
 	if (HAVE_LLVM >= 0x800) {
 		ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
 						   voffset, NULL, num_channels,
-						   ctx->f32, glc, false,
+						   ctx->f32, glc, slc,
 						   writeonly_memory, true, true);
 	} else {
 		ac_build_llvm7_buffer_store_common(ctx, rsrc, data, vindex, voffset,
-						   num_channels, glc, false,
+						   num_channels, glc, slc,
 						   writeonly_memory, true);
 	}
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
  * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
  * or v4i32 (num_channels=3,4).
  */
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 5ed9a112457..bbdb01184e6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -275,20 +275,21 @@  ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 			    bool swizzle_enable_hint);
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
 			     LLVMValueRef rsrc,
 			     LLVMValueRef data,
 			     LLVMValueRef vindex,
 			     LLVMValueRef voffset,
 			     unsigned num_channels,
 			     bool glc,
+			     bool slc,
 			     bool writeonly_memory);
 
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
 		     LLVMValueRef rsrc,
 		     int num_channels,
 		     LLVMValueRef vindex,
 		     LLVMValueRef voffset,
 		     LLVMValueRef soffset,
 		     unsigned inst_offset,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 833b1e54abc..7e2e8c30306 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2532,21 +2532,21 @@  static void visit_image_store(struct ac_nir_context *ctx,
 
 		if (src_channels == 3)
 			src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
 
 		vindex = LLVMBuildExtractElement(ctx->ac.builder,
 						 get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");
 
 		ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
 					     ctx->ac.i32_0, src_channels,
-					     args.cache_policy & ac_glc,
+					     args.cache_policy & ac_glc, false,
 					     writeonly_memory);
 	} else {
 		args.opcode = ac_image_store;
 		args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		get_image_coords(ctx, instr, &args, dim, is_array);
 		args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
 		args.dim = get_ac_image_dim(&ctx->ac, dim, is_array);
 		args.dmask = 15;
 
 		ac_build_image_opcode(&ctx->ac, &args);
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 362c63c2e44..3bed818d5ad 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -849,32 +849,29 @@  void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
 			 * multiple subdraws, the first primitive ID is > 0
 			 * for subsequent subdraws. Each subdraw uses a different
 			 * portion of the output index buffer. Offset the store
 			 * vindex by the first primitive ID to get the correct
 			 * store address for the subdraw.
 			 */
 			start = LLVMBuildAdd(builder, start, vertex_counter, "");
 		}
 
 		/* Write indices for accepted primitives. */
-		LLVMValueRef buf_args[] = {
-			ac_to_float(&ctx->ac, ac_build_expand_to_vec4(&ctx->ac,
-						ac_build_gather_values(&ctx->ac, index, 3), 3)),
-			output_indexbuf,
-			LLVMBuildAdd(builder, start, prim_index, ""),
-			ctx->i32_0, /* voffset */
-			ctx->i1true, /* glc */
-			LLVMConstInt(ctx->i1, INDEX_STORES_USE_SLC, 0),
-		};
-		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
-				   ctx->voidt, buf_args, 6,
-				   ac_get_store_intr_attribs(true));
+		LLVMValueRef vindex = LLVMBuildAdd(builder, start, prim_index, "");
+		LLVMValueRef vdata = ac_build_gather_values(&ctx->ac, index, 3);
+
+		if (!ac_has_vec3_support(ctx->ac.chip_class, true))
+			vdata = ac_build_expand_to_vec4(&ctx->ac, vdata, 3);
+
+		ac_build_buffer_store_format(&ctx->ac, output_indexbuf, vdata,
+					     vindex, ctx->i32_0, 3, true,
+					     INDEX_STORES_USE_SLC, true);
 	}
 	lp_build_endif(&if_accepted);
 
 	LLVMBuildRetVoid(builder);
 }
 
 /* Return false if the shader isn't ready. */
 static bool si_shader_select_prim_discard_cs(struct si_context *sctx,
 					     const struct pipe_draw_info *info,
 					     bool primitive_restart)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index cc634f495ef..f4a988f90fa 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -728,21 +728,21 @@  static void store_emit(
 		return;
 	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
 
 		ac_build_buffer_store_format(&ctx->ac, args.resource,
 					     ac_build_gather_values(&ctx->ac, chans, num_channels),
 					     vindex, ctx->i32_0 /* voffset */,
 					     num_channels,
-					     !!(args.cache_policy & ac_glc),
+					     !!(args.cache_policy & ac_glc), false,
 					     writeonly_memory);
 	} else {
 		args.opcode = ac_image_store;
 		args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
 		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 		args.attributes = ac_get_store_intr_attribs(writeonly_memory);
 		args.dmask = 0xf;
 
 		emit_data->output[emit_data->chan] =
 			ac_build_image_opcode(&ctx->ac, &args);