[04/11] ac: add ac_build_buffer_store_format() helper

Submitted by Samuel Pitoiset on March 12, 2019, 4:19 p.m.

Details

Message ID 20190312161947.24880-5-samuel.pitoiset@gmail.com
State New
Headers show
Series "ac: use LLVM 8 buffer intrinsics everywhere" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset March 12, 2019, 4:19 p.m.
Similar to ac_build_buffer_load_format().

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/common/ac_llvm_build.c  | 100 ++++++++++++++++++++++++++++++++
 src/amd/common/ac_llvm_build.h  |  11 ++++
 src/amd/common/ac_nir_to_llvm.c |  29 +++------
 3 files changed, 119 insertions(+), 21 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 253073e52fb..bb8a470ae1d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,106 @@  LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
 	return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
 }
 
+static void
+ac_build_buffer_store_common(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef data,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     unsigned num_channels,
+			     bool glc,
+			     bool slc,
+			     bool writeonly_memory,
+			     bool use_format)
+{
+	LLVMValueRef args[] = {
+		data,
+		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+		vindex ? vindex : ctx->i32_0,
+		voffset,
+		LLVMConstInt(ctx->i1, glc, 0),
+		LLVMConstInt(ctx->i1, slc, 0)
+	};
+	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	char name[256];
+
+	if (use_format) {
+		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s",
+			 type_names[func]);
+	} else {
+		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+			 type_names[func]);
+	}
+
+	ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
+			   ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
+				   LLVMValueRef rsrc,
+				   LLVMValueRef data,
+				   LLVMValueRef vindex,
+				   LLVMValueRef voffset,
+				   LLVMValueRef soffset,
+				   unsigned num_channels,
+				   bool glc,
+				   bool slc,
+				   bool writeonly_memory,
+				   bool use_format,
+				   bool structurized)
+{
+	LLVMValueRef args[5];
+	int idx = 0;
+	args[idx++] = data;
+	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+	if (structurized)
+		args[idx++] = vindex ? vindex : ctx->i32_0;
+	args[idx++] = voffset ? voffset : ctx->i32_0;
+	args[idx++] = soffset ? soffset : ctx->i32_0;
+	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	const char *indexing_kind = structurized ? "struct" : "raw";
+	char name[256];
+
+	if (use_format) {
+		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
+			 indexing_kind, type_names[func]);
+	} else {
+		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
+			 indexing_kind, type_names[func]);
+	}
+
+	ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+			   ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef data,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     unsigned num_channels,
+			     bool glc,
+			     bool writeonly_memory)
+{
+	if (HAVE_LLVM >= 0x800) {
+		ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
+						   voffset, NULL, num_channels,
+						   glc, false, writeonly_memory,
+						   true, true);
+	} else {
+		ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
+					     num_channels, glc, false,
+					     writeonly_memory, true);
+	}
+}
+
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
  * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
  * or v4i32 (num_channels=3,4).
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 0fb3eb52f05..069ba7aa3c9 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -266,6 +266,17 @@  ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 		            bool slc,
 			    bool writeonly_memory,
 			    bool swizzle_enable_hint);
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+			     LLVMValueRef rsrc,
+			     LLVMValueRef data,
+			     LLVMValueRef vindex,
+			     LLVMValueRef voffset,
+			     unsigned num_channels,
+			     bool glc,
+			     bool writeonly_memory);
+
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
 		     LLVMValueRef rsrc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ff29345ffe5..c10a0cce16f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2512,7 +2512,6 @@  static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 static void visit_image_store(struct ac_nir_context *ctx,
 			      nir_intrinsic_instr *instr)
 {
-	LLVMValueRef params[8];
 	const nir_deref_instr *image_deref = get_image_deref(instr);
 	const struct glsl_type *type = image_deref->type;
 	const nir_variable *var = nir_deref_instr_get_variable(image_deref);
@@ -2524,34 +2523,22 @@  static void visit_image_store(struct ac_nir_context *ctx,
 					     writeonly_memory);
 
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
-		char name[48];
-		const char *types[] = { "f32", "v2f32", "v4f32" };
 		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
 		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		unsigned src_channels = ac_get_llvm_num_components(src);
+		LLVMValueRef vindex;
 
 		if (src_channels == 3)
 			src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
 
-		params[0] = src; /* data */
-		params[1] = rsrc;
-		params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
-						    ctx->ac.i32_0, ""); /* vindex */
-		params[3] = ctx->ac.i32_0; /* voffset */
-		snprintf(name, sizeof(name), "%s.%s",
-		         HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format"
-		                            : "llvm.amdgcn.buffer.store.format",
-		         types[CLAMP(src_channels, 1, 3) - 1]);
+		vindex = LLVMBuildExtractElement(ctx->ac.builder,
+						 get_src(ctx, instr->src[1]),
+						 ctx->ac.i32_0, "");
 
-		if (HAVE_LLVM >= 0x800) {
-			params[4] = ctx->ac.i32_0; /* soffset */
-			params[5] = (args.cache_policy & ac_glc) ? ctx->ac.i32_1 : ctx->ac.i32_0;
-		} else {
-			params[4] = LLVMConstInt(ctx->ac.i1, !!(args.cache_policy & ac_glc), 0);
-			params[5] = ctx->ac.i1false;  /* slc */
-		}
-		ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6,
-				   ac_get_store_intr_attribs(writeonly_memory));
+		ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
+					     ctx->ac.i32_0, src_channels,
+					     args.cache_policy & ac_glc,
+					     writeonly_memory);
 	} else {
 		args.opcode = ac_image_store;
 		args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));

Comments

On 3/12/19 5:19 PM, Samuel Pitoiset wrote:
> Similar to ac_build_buffer_load_format().
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>   src/amd/common/ac_llvm_build.c  | 100 ++++++++++++++++++++++++++++++++
>   src/amd/common/ac_llvm_build.h  |  11 ++++
>   src/amd/common/ac_nir_to_llvm.c |  29 +++------
>   3 files changed, 119 insertions(+), 21 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 253073e52fb..bb8a470ae1d 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1082,6 +1082,106 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
>   	return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
>   }
>   
> +static void
> +ac_build_buffer_store_common(struct ac_llvm_context *ctx,
> +			     LLVMValueRef rsrc,
> +			     LLVMValueRef data,
> +			     LLVMValueRef vindex,
> +			     LLVMValueRef voffset,
> +			     unsigned num_channels,
> +			     bool glc,
> +			     bool slc,
> +			     bool writeonly_memory,
> +			     bool use_format)
> +{
> +	LLVMValueRef args[] = {
> +		data,
> +		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
> +		vindex ? vindex : ctx->i32_0,
> +		voffset,
> +		LLVMConstInt(ctx->i1, glc, 0),
> +		LLVMConstInt(ctx->i1, slc, 0)
> +	};
> +	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +
> +	const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +	char name[256];
> +
> +	if (use_format) {
> +		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s",
> +			 type_names[func]);
> +	} else {
> +		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
> +			 type_names[func]);
> +	}
> +
> +	ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
> +			   ac_get_store_intr_attribs(writeonly_memory));
> +}
> +
> +static void
> +ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
> +				   LLVMValueRef rsrc,
> +				   LLVMValueRef data,
> +				   LLVMValueRef vindex,
> +				   LLVMValueRef voffset,
> +				   LLVMValueRef soffset,
> +				   unsigned num_channels,
> +				   bool glc,
> +				   bool slc,
> +				   bool writeonly_memory,
> +				   bool use_format,
> +				   bool structurized)
> +{
> +	LLVMValueRef args[5];
This should be 6.
> +	int idx = 0;
> +	args[idx++] = data;
> +	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
> +	if (structurized)
> +		args[idx++] = vindex ? vindex : ctx->i32_0;
> +	args[idx++] = voffset ? voffset : ctx->i32_0;
> +	args[idx++] = soffset ? soffset : ctx->i32_0;
> +	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> +	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +
> +	const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +	const char *indexing_kind = structurized ? "struct" : "raw";
> +	char name[256];
> +
> +	if (use_format) {
> +		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
> +			 indexing_kind, type_names[func]);
> +	} else {
> +		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
> +			 indexing_kind, type_names[func]);
> +	}
> +
> +	ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
> +			   ac_get_store_intr_attribs(writeonly_memory));
> +}
> +
> +void
> +ac_build_buffer_store_format(struct ac_llvm_context *ctx,
> +			     LLVMValueRef rsrc,
> +			     LLVMValueRef data,
> +			     LLVMValueRef vindex,
> +			     LLVMValueRef voffset,
> +			     unsigned num_channels,
> +			     bool glc,
> +			     bool writeonly_memory)
> +{
> +	if (HAVE_LLVM >= 0x800) {
> +		ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
> +						   voffset, NULL, num_channels,
> +						   glc, false, writeonly_memory,
> +						   true, true);
> +	} else {
> +		ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
> +					     num_channels, glc, false,
> +					     writeonly_memory, true);
> +	}
> +}
> +
>   /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
>    * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
>    * or v4i32 (num_channels=3,4).
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 0fb3eb52f05..069ba7aa3c9 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -266,6 +266,17 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
>   		            bool slc,
>   			    bool writeonly_memory,
>   			    bool swizzle_enable_hint);
> +
> +void
> +ac_build_buffer_store_format(struct ac_llvm_context *ctx,
> +			     LLVMValueRef rsrc,
> +			     LLVMValueRef data,
> +			     LLVMValueRef vindex,
> +			     LLVMValueRef voffset,
> +			     unsigned num_channels,
> +			     bool glc,
> +			     bool writeonly_memory);
> +
>   LLVMValueRef
>   ac_build_buffer_load(struct ac_llvm_context *ctx,
>   		     LLVMValueRef rsrc,
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index ff29345ffe5..c10a0cce16f 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2512,7 +2512,6 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
>   static void visit_image_store(struct ac_nir_context *ctx,
>   			      nir_intrinsic_instr *instr)
>   {
> -	LLVMValueRef params[8];
>   	const nir_deref_instr *image_deref = get_image_deref(instr);
>   	const struct glsl_type *type = image_deref->type;
>   	const nir_variable *var = nir_deref_instr_get_variable(image_deref);
> @@ -2524,34 +2523,22 @@ static void visit_image_store(struct ac_nir_context *ctx,
>   					     writeonly_memory);
>   
>   	if (dim == GLSL_SAMPLER_DIM_BUF) {
> -		char name[48];
> -		const char *types[] = { "f32", "v2f32", "v4f32" };
>   		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
>   		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
>   		unsigned src_channels = ac_get_llvm_num_components(src);
> +		LLVMValueRef vindex;
>   
>   		if (src_channels == 3)
>   			src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
>   
> -		params[0] = src; /* data */
> -		params[1] = rsrc;
> -		params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
> -						    ctx->ac.i32_0, ""); /* vindex */
> -		params[3] = ctx->ac.i32_0; /* voffset */
> -		snprintf(name, sizeof(name), "%s.%s",
> -		         HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format"
> -		                            : "llvm.amdgcn.buffer.store.format",
> -		         types[CLAMP(src_channels, 1, 3) - 1]);
> +		vindex = LLVMBuildExtractElement(ctx->ac.builder,
> +						 get_src(ctx, instr->src[1]),
> +						 ctx->ac.i32_0, "");
>   
> -		if (HAVE_LLVM >= 0x800) {
> -			params[4] = ctx->ac.i32_0; /* soffset */
> -			params[5] = (args.cache_policy & ac_glc) ? ctx->ac.i32_1 : ctx->ac.i32_0;
> -		} else {
> -			params[4] = LLVMConstInt(ctx->ac.i1, !!(args.cache_policy & ac_glc), 0);
> -			params[5] = ctx->ac.i1false;  /* slc */
> -		}
> -		ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6,
> -				   ac_get_store_intr_attribs(writeonly_memory));
> +		ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
> +					     ctx->ac.i32_0, src_channels,
> +					     args.cache_policy & ac_glc,
> +					     writeonly_memory);
>   	} else {
>   		args.opcode = ac_image_store;
>   		args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));