ac, radv: fix removing the vec3 restriction on SI

Submitted by Samuel Pitoiset on June 3, 2019, 1:13 p.m.

Details

Message ID 20190603131312.7181-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "ac, radv: fix removing the vec3 restriction on SI" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset June 3, 2019, 1:13 p.m.
I thought LLVM was able to handle that itself but actually it
does not. That means we shouldn't try to emit vec3 on SI because
it's unsupported.

Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")"
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/common/ac_llvm_build.c    | 12 ++++++------
 src/amd/common/ac_llvm_util.h     |  9 +++++++++
 src/amd/common/ac_nir_to_llvm.c   |  3 ++-
 src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
 4 files changed, 18 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 613c1eef942..7f5c8ef873c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1167,7 +1167,7 @@  ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
 	args[idx++] = voffset ? voffset : ctx->i32_0;
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
+	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256], type_name[8];
 
@@ -1227,7 +1227,7 @@  ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 {
 	/* Split 3 channel stores, because only LLVM 9+ support 3-channel
 	 * intrinsics. */
-	if (num_channels == 3 && HAVE_LLVM < 0x900) {
+	if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class)) {
 		LLVMValueRef v[3], v01;
 
 		for (int i = 0; i < 3; i++) {
@@ -1354,7 +1354,7 @@  ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
 	args[idx++] = voffset ? voffset : ctx->i32_0;
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
+	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256], type_name[8];
 
@@ -1420,7 +1420,7 @@  ac_build_buffer_load(struct ac_llvm_context *ctx,
 		if (num_channels == 1)
 			return result[0];
 
-		if (num_channels == 3 && HAVE_LLVM < 0x900)
+		if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class))
 			result[num_channels++] = LLVMGetUndef(ctx->f32);
 		return ac_build_gather_values(ctx, result, num_channels);
 	}
@@ -1512,7 +1512,7 @@  ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
+	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256], type_name[8];
 
@@ -2011,7 +2011,7 @@  ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
 	args[idx++] = soffset ? soffset : ctx->i32_0;
 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
+	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
 	const char *indexing_kind = structurized ? "struct" : "raw";
 	char name[256], type_name[8];
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index ca00540da80..a45647a3360 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -146,6 +146,15 @@  bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef mod
 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
 void ac_enable_global_isel(LLVMTargetMachineRef tm);
 
+static inline bool
+ac_has_vec3_support(enum chip_class chip)
+{
+	if (chip == GFX6)
+		return false;
+
+	return HAVE_LLVM >= 0x900;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 51f92a6b062..429dac63d63 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1576,7 +1576,8 @@  static void visit_store_ssbo(struct ac_nir_context *ctx,
 
 		/* Due to an LLVM limitation with LLVM < 9, split 3-element
 		 * writes into a 2-element and a 1-element write. */
-		if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < 0x900)) {
+		if (count == 3 &&
+		    (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class))) {
 			writemask |= 1 << (start + 2);
 			count = 2;
 		}
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index dca4bebcdd1..ab552ae34ab 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2768,7 +2768,7 @@  radv_emit_stream_output(struct radv_shader_context *ctx,
 		/* fall through */
 	case 4: /* as v4i32 */
 		vdata = ac_build_gather_values(&ctx->ac, out,
-					       HAVE_LLVM < 0x900 ?
+					       !ac_has_vec3_support(ctx->ac.chip_class) ?
 					       util_next_power_of_two(num_comps) :
 					       num_comps);
 		break;

Comments

> On Jun 3, 2019, at 9:13 AM, Samuel Pitoiset <samuel.pitoiset@gmail.com> wrote:
> 
> I thought LLVM was able to handle that itself but actually it
> does not. That means we shouldn't try to emit vec3 on SI because
> it's unsupported.
> 

It should. Can you file a bug with an example that doesn’t work?



> Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")"
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
> src/amd/common/ac_llvm_build.c    | 12 ++++++------
> src/amd/common/ac_llvm_util.h     |  9 +++++++++
> src/amd/common/ac_nir_to_llvm.c   |  3 ++-
> src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
> 4 files changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 613c1eef942..7f5c8ef873c 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
> 	args[idx++] = voffset ? voffset : ctx->i32_0;
> 	args[idx++] = soffset ? soffset : ctx->i32_0;
> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
> 	const char *indexing_kind = structurized ? "struct" : "raw";
> 	char name[256], type_name[8];
> 
> @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
> {
> 	/* Split 3 channel stores, because only LLVM 9+ support 3-channel
> 	 * intrinsics. */
> -	if (num_channels == 3 && HAVE_LLVM < 0x900) {
> +	if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class)) {
> 		LLVMValueRef v[3], v01;
> 
> 		for (int i = 0; i < 3; i++) {
> @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
> 	args[idx++] = voffset ? voffset : ctx->i32_0;
> 	args[idx++] = soffset ? soffset : ctx->i32_0;
> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
> 	const char *indexing_kind = structurized ? "struct" : "raw";
> 	char name[256], type_name[8];
> 
> @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
> 		if (num_channels == 1)
> 			return result[0];
> 
> -		if (num_channels == 3 && HAVE_LLVM < 0x900)
> +		if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class))
> 			result[num_channels++] = LLVMGetUndef(ctx->f32);
> 		return ac_build_gather_values(ctx, result, num_channels);
> 	}
> @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
> 	args[idx++] = soffset ? soffset : ctx->i32_0;
> 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
> 	const char *indexing_kind = structurized ? "struct" : "raw";
> 	char name[256], type_name[8];
> 
> @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
> 	args[idx++] = soffset ? soffset : ctx->i32_0;
> 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
> 	const char *indexing_kind = structurized ? "struct" : "raw";
> 	char name[256], type_name[8];
> 
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index ca00540da80..a45647a3360 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -146,6 +146,15 @@ bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef mod
> void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
> void ac_enable_global_isel(LLVMTargetMachineRef tm);
> 
> +static inline bool
> +ac_has_vec3_support(enum chip_class chip)
> +{
> +	if (chip == GFX6)
> +		return false;
> +
> +	return HAVE_LLVM >= 0x900;
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 51f92a6b062..429dac63d63 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
> 
> 		/* Due to an LLVM limitation with LLVM < 9, split 3-element
> 		 * writes into a 2-element and a 1-element write. */
> -		if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < 0x900)) {
> +		if (count == 3 &&
> +		    (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class))) {
> 			writemask |= 1 << (start + 2);
> 			count = 2;
> 		}
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index dca4bebcdd1..ab552ae34ab 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx,
> 		/* fall through */
> 	case 4: /* as v4i32 */
> 		vdata = ac_build_gather_values(&ctx->ac, out,
> -					       HAVE_LLVM < 0x900 ?
> +					       !ac_has_vec3_support(ctx->ac.chip_class) ?
> 					       util_next_power_of_two(num_comps) :
> 					       num_comps);
> 		break;
> -- 
> 2.21.0
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 6/3/19 3:48 PM, Matt Arsenault wrote:
>
>> On Jun 3, 2019, at 9:13 AM, Samuel Pitoiset <samuel.pitoiset@gmail.com> wrote:
>>
>> I thought LLVM was able to handle that itself but actually it
>> does not. That means we shouldn't try to emit vec3 on SI because
>> it's unsupported.
>>
> It should. Can you file a bug with an example that doesn’t work?
https://bugs.llvm.org/show_bug.cgi?id=42116
>
>
>
>> Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")"
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>> src/amd/common/ac_llvm_build.c    | 12 ++++++------
>> src/amd/common/ac_llvm_util.h     |  9 +++++++++
>> src/amd/common/ac_nir_to_llvm.c   |  3 ++-
>> src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
>> 4 files changed, 18 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
>> index 613c1eef942..7f5c8ef873c 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
>> 	args[idx++] = voffset ? voffset : ctx->i32_0;
>> 	args[idx++] = soffset ? soffset : ctx->i32_0;
>> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
>> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
>> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
>> 	const char *indexing_kind = structurized ? "struct" : "raw";
>> 	char name[256], type_name[8];
>>
>> @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
>> {
>> 	/* Split 3 channel stores, because only LLVM 9+ support 3-channel
>> 	 * intrinsics. */
>> -	if (num_channels == 3 && HAVE_LLVM < 0x900) {
>> +	if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class)) {
>> 		LLVMValueRef v[3], v01;
>>
>> 		for (int i = 0; i < 3; i++) {
>> @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
>> 	args[idx++] = voffset ? voffset : ctx->i32_0;
>> 	args[idx++] = soffset ? soffset : ctx->i32_0;
>> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
>> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
>> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
>> 	const char *indexing_kind = structurized ? "struct" : "raw";
>> 	char name[256], type_name[8];
>>
>> @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
>> 		if (num_channels == 1)
>> 			return result[0];
>>
>> -		if (num_channels == 3 && HAVE_LLVM < 0x900)
>> +		if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class))
>> 			result[num_channels++] = LLVMGetUndef(ctx->f32);
>> 		return ac_build_gather_values(ctx, result, num_channels);
>> 	}
>> @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
>> 	args[idx++] = soffset ? soffset : ctx->i32_0;
>> 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
>> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
>> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
>> 	const char *indexing_kind = structurized ? "struct" : "raw";
>> 	char name[256], type_name[8];
>>
>> @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
>> 	args[idx++] = soffset ? soffset : ctx->i32_0;
>> 	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>> 	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
>> -	unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels;
>> +	unsigned func = !ac_has_vec3_support(ctx->chip_class) && num_channels == 3 ? 4 : num_channels;
>> 	const char *indexing_kind = structurized ? "struct" : "raw";
>> 	char name[256], type_name[8];
>>
>> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
>> index ca00540da80..a45647a3360 100644
>> --- a/src/amd/common/ac_llvm_util.h
>> +++ b/src/amd/common/ac_llvm_util.h
>> @@ -146,6 +146,15 @@ bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef mod
>> void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
>> void ac_enable_global_isel(LLVMTargetMachineRef tm);
>>
>> +static inline bool
>> +ac_has_vec3_support(enum chip_class chip)
>> +{
>> +	if (chip == GFX6)
>> +		return false;
>> +
>> +	return HAVE_LLVM >= 0x900;
>> +}
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>> index 51f92a6b062..429dac63d63 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>>
>> 		/* Due to an LLVM limitation with LLVM < 9, split 3-element
>> 		 * writes into a 2-element and a 1-element write. */
>> -		if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < 0x900)) {
>> +		if (count == 3 &&
>> +		    (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class))) {
>> 			writemask |= 1 << (start + 2);
>> 			count = 2;
>> 		}
>> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
>> index dca4bebcdd1..ab552ae34ab 100644
>> --- a/src/amd/vulkan/radv_nir_to_llvm.c
>> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
>> @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx,
>> 		/* fall through */
>> 	case 4: /* as v4i32 */
>> 		vdata = ac_build_gather_values(&ctx->ac, out,
>> -					       HAVE_LLVM < 0x900 ?
>> +					       !ac_has_vec3_support(ctx->ac.chip_class) ?
>> 					       util_next_power_of_two(num_comps) :
>> 					       num_comps);
>> 		break;
>> -- 
>> 2.21.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev