[backport] radv: apply the indexing workaround for atomic buffer operations on GFX9

Submitted by Samuel Pitoiset on May 8, 2019, 9:22 a.m.

Details

Message ID 20190508092236.31053-1-samuel.pitoiset@gmail.com
State Accepted
Headers show
Series "radv: apply the indexing workaround for atomic buffer operations on GFX9" ( rev: 2 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset May 8, 2019, 9:22 a.m.
Because the new raw/struct intrinsics are buggy with LLVM 8
(they weren't marked as source of divergence), we fallback to the
old instrinsics for atomic buffer operations only. This means we need
to apply the indexing workaround for GFX9. The load/store
operations still use the new LLVM 8 intrinsics.

The fact that we need another workaround is painful but we should
be able to clean up that a bit once LLVM 7 support will be dropped.

This fixes a GPU hang with AC Odyssey and some rendering problems
with Nioh.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 src/amd/common/ac_nir_to_llvm.c   | 12 +++++++-----
 src/amd/common/ac_shader_abi.h    |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c |  6 ++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b96dc7f5420..a0815995b12 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2359,10 +2359,12 @@  static void get_image_coords(struct ac_nir_context *ctx,
 }
 
 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
-                                                const nir_intrinsic_instr *instr, bool write)
+                                                const nir_intrinsic_instr *instr,
+						bool write, bool atomic)
 {
 	LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
-	if (ctx->abi->gfx9_stride_size_workaround) {
+	if (ctx->abi->gfx9_stride_size_workaround ||
+	    (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
 		LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
 		LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
 		stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
@@ -2395,7 +2397,7 @@  static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		unsigned num_channels = util_last_bit(mask);
 		LLVMValueRef rsrc, vindex;
 
-		rsrc = get_image_buffer_descriptor(ctx, instr, false);
+		rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");
 
@@ -2439,7 +2441,7 @@  static void visit_image_store(struct ac_nir_context *ctx,
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
 		char name[48];
 		const char *types[] = { "f32", "v2f32", "v4f32" };
-		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
+		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
 		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		unsigned src_channels = ac_get_llvm_num_components(src);
 
@@ -2535,7 +2537,7 @@  static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	params[param_count++] = get_src(ctx, instr->src[3]);
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
-		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
+		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
 		params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 								ctx->ac.i32_0, ""); /* vindex */
 		params[param_count++] = ctx->ac.i32_0; /* voffset */
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index ee18e6c1923..9eb4d37257e 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -195,6 +195,7 @@  struct ac_shader_abi {
 	/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
 	* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
 	bool gfx9_stride_size_workaround;
+	bool gfx9_stride_size_workaround_for_atomic;
 };
 
 #endif /* AC_SHADER_ABI_H */
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index b1eeb2cc1f7..3b5381f5353 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3497,6 +3497,12 @@  LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 	ctx.abi.clamp_shadow_reference = false;
 	ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
 
+	/* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
+	 * we fallback to the old intrinsics for atomic buffer image operations
+	 * and thus we need to apply the indexing workaround...
+	 */
+	ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
+
 	if (shader_count >= 2)
 		ac_init_exec_full_mask(&ctx.ac);