ac: allow to use vec3 for typed/untyped buffer stores/loads with LLVM 9+

Submitted by Samuel Pitoiset on March 25, 2019, 5:17 p.m.

Details

Message ID 20190325171736.15101-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "ac: allow to use vec3 for typed/untyped buffer stores/loads with LLVM 9+" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset March 25, 2019, 5:17 p.m.
27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1236757 (0.45 %)
VGPRS: 866056 -> 867488 (0.17 %)
Spilled SGPRs: 24201 -> 24169 (-0.13 %)
Code Size: 46134836 -> 46115944 (-0.04 %) bytes
Max Waves: 232287 -> 232070 (-0.09 %)

Totals from affected shaders:
SGPRS: 247624 -> 253208 (2.26 %)
VGPRS: 214952 -> 216384 (0.67 %)
Spilled SGPRs: 63 -> 31 (-50.79 %)
Code Size: 7633772 -> 7614880 (-0.25 %) bytes
Max Waves: 62065 -> 61848 (-0.35 %)

This changes requires LLVM r356755.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/common/ac_llvm_build.c | 21 +++++++++++----------
 src/amd/common/ac_llvm_build.h |  1 +
 2 files changed, 12 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1123dce2cc8..cd963bc008c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -83,6 +83,7 @@  ac_llvm_context_init(struct ac_llvm_context *ctx,
 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
 	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+	ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
 	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
@@ -1150,9 +1151,9 @@  ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
 	args[idx++] = voffset ? voffset : ctx->i32_0;
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
 
-	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256];
 
@@ -1334,10 +1335,10 @@  ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
 	args[idx++] = voffset ? voffset : ctx->i32_0;
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
 
-	LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
-	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v3f32, ctx->v4f32};
+	const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256];
 
@@ -1490,10 +1491,10 @@  ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
 
-	LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
-	const char *type_names[] = {"i32", "v2i32", "v4i32"};
+	LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v3i32, ctx->v4i32};
+	const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256];
 
@@ -1651,9 +1652,9 @@  ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
 
-	const char *type_names[] = {"i32", "v2i32", "v4i32"};
+	const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256];
 
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 9151c743bed..d2f8cd5e08b 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -71,6 +71,7 @@  struct ac_llvm_context {
 	LLVMTypeRef v3i32;
 	LLVMTypeRef v4i32;
 	LLVMTypeRef v2f32;
+	LLVMTypeRef v3f32;
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;
 

Comments

Just figured that's broken for older LLVM, I will fix.

On 3/25/19 6:17 PM, Samuel Pitoiset wrote:
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1236757 (0.45 %)
> VGPRS: 866056 -> 867488 (0.17 %)
> Spilled SGPRs: 24201 -> 24169 (-0.13 %)
> Code Size: 46134836 -> 46115944 (-0.04 %) bytes
> Max Waves: 232287 -> 232070 (-0.09 %)
>
> Totals from affected shaders:
> SGPRS: 247624 -> 253208 (2.26 %)
> VGPRS: 214952 -> 216384 (0.67 %)
> Spilled SGPRs: 63 -> 31 (-50.79 %)
> Code Size: 7633772 -> 7614880 (-0.25 %) bytes
> Max Waves: 62065 -> 61848 (-0.35 %)
>
> This changes requires LLVM r356755.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>   src/amd/common/ac_llvm_build.c | 21 +++++++++++----------
>   src/amd/common/ac_llvm_build.h |  1 +
>   2 files changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 1123dce2cc8..cd963bc008c 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -83,6 +83,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>   	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
>   	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
>   	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
> +	ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
>   	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
>   	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>   	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
> @@ -1150,9 +1151,9 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
>   	args[idx++] = voffset ? voffset : ctx->i32_0;
>   	args[idx++] = soffset ? soffset : ctx->i32_0;
>   	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
>   
> -	const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +	const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
>   	const char *indexing_kind = structurized ? "struct" : "raw";
>   	char name[256];
>   
> @@ -1334,10 +1335,10 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
>   	args[idx++] = voffset ? voffset : ctx->i32_0;
>   	args[idx++] = soffset ? soffset : ctx->i32_0;
>   	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
>   
> -	LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
> -	const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +	LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v3f32, ctx->v4f32};
> +	const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
>   	const char *indexing_kind = structurized ? "struct" : "raw";
>   	char name[256];
>   
> @@ -1490,10 +1491,10 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
>   	args[idx++] = soffset ? soffset : ctx->i32_0;
>   	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>   	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
>   
> -	LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
> -	const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +	LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v3i32, ctx->v4i32};
> +	const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
>   	const char *indexing_kind = structurized ? "struct" : "raw";
>   	char name[256];
>   
> @@ -1651,9 +1652,9 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
>   	args[idx++] = soffset ? soffset : ctx->i32_0;
>   	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>   	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +	unsigned func = CLAMP(num_channels, 1, HAVE_LLVM >= 0x900 ? 4 : 3) - 1;
>   
> -	const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +	const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
>   	const char *indexing_kind = structurized ? "struct" : "raw";
>   	char name[256];
>   
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 9151c743bed..d2f8cd5e08b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -71,6 +71,7 @@ struct ac_llvm_context {
>   	LLVMTypeRef v3i32;
>   	LLVMTypeRef v4i32;
>   	LLVMTypeRef v2f32;
> +	LLVMTypeRef v3f32;
>   	LLVMTypeRef v4f32;
>   	LLVMTypeRef v8i32;
>