[3/3] radv: add a workaround for correct derivs after kill

Submitted by Samuel Pitoiset on May 10, 2018, 9:43 a.m.

Details

Message ID 20180510094305.22864-3-samuel.pitoiset@gmail.com
State Rejected
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset May 10, 2018, 9:43 a.m.
D3D behaviour and Vulkan are different. The workaround can
be enabled with RADV_DEBUG=correctderivsafterkill.

When enabled, this fixes rendering issues with DXVK. For
now, I don't want to force-enable this behaviour for DXVK
because having per-app workarounds is a bad idea.

We are probably going to draft a little extension that
explicitely enable this when needed.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_debug.h       |  1 +
 src/amd/vulkan/radv_device.c      |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_shader.c      |  2 ++
 src/amd/vulkan/radv_shader.h      |  2 ++
 5 files changed, 42 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 9dda9b6b0c..d63f6538d1 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -45,6 +45,7 @@  enum {
 	RADV_DEBUG_PREOPTIR          = 0x8000,
 	RADV_DEBUG_NO_DYNAMIC_BOUNDS = 0x10000,
 	RADV_DEBUG_NO_OUT_OF_ORDER   = 0x20000,
+	RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL = 0x40000,
 };
 
 enum {
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index a7f4a5ab7b..d01f20617f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -391,6 +391,7 @@  static const struct debug_control radv_debug_options[] = {
 	{"preoptir", RADV_DEBUG_PREOPTIR},
 	{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
 	{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
+	{"correctderivsafterkill", RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL},
 	{NULL, 0}
 };
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index e2d241e495..60ee408871 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -85,6 +85,8 @@  struct radv_shader_context {
 	LLVMValueRef persp_sample, persp_center, persp_centroid;
 	LLVMValueRef linear_sample, linear_center, linear_centroid;
 
+	LLVMValueRef postponed_kill;
+
 	gl_shader_stage stage;
 
 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
@@ -1486,6 +1488,22 @@  load_gs_input(struct ac_shader_abi *abi,
 static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
 {
 	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+	if (ctx->shader_info->fs.force_correct_derivs_after_kill) {
+		/* LLVM 6.0 can kill immediately while maintaining WQM. */
+		if (HAVE_LLVM >= 0x0600) {
+			ac_build_kill_if_false(&ctx->ac,
+					       ac_build_wqm_vote(&ctx->ac,
+								 visible));
+		}
+
+		LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder,
+						  ctx->postponed_kill, "");
+		mask = LLVMBuildAnd(ctx->ac.builder, mask, visible, "");
+		LLVMBuildStore(ctx->ac.builder, mask, ctx->postponed_kill);
+		return;
+	}
+
 	ac_build_kill_if_false(&ctx->ac, visible);
 }
 
@@ -2776,6 +2794,12 @@  handle_fs_outputs_post(struct radv_shader_context *ctx)
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 	struct ac_export_args color_args[8];
 
+	if (ctx->postponed_kill) {
+		ac_build_kill_if_false(&ctx->ac,
+				       LLVMBuildLoad(ctx->ac.builder,
+						     ctx->postponed_kill, ""));
+	}
+
 	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef values[4];
 
@@ -3075,6 +3099,18 @@  LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
 	                shader_count >= 2 ? shaders[shader_count - 2]->info.stage  : MESA_SHADER_VERTEX);
 
+	if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
+	    shader_info->info.ps.uses_derivatives &&
+	    shader_info->info.ps.uses_kill &&
+	    options->force_correct_derivs_after_kill) {
+		ctx.postponed_kill =
+			ac_build_alloca_undef(&ctx.ac, ctx.ac.i1, "");
+		/* true = don't kill. */
+		LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.postponed_kill);
+
+		shader_info->fs.force_correct_derivs_after_kill = true;
+	}
+
 	ctx.abi.inputs = &ctx.inputs[0];
 	ctx.abi.emit_outputs = handle_shader_outputs_post;
 	ctx.abi.emit_vertex = visit_emit_vertex;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 27b3fbed16..9bafd53b98 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -486,6 +486,8 @@  shader_variant_create(struct radv_device *device,
 				 device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
 	options->record_llvm_ir = device->keep_shader_info;
 	options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
+	options->force_correct_derivs_after_kill =
+		device->instance->debug_flags & RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL;
 
 	if (options->supports_spill)
 		tm_options |= AC_TM_SUPPORTS_SPILL;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index f8826f41ef..fd190dc537 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -108,6 +108,7 @@  struct radv_nir_compiler_options {
 	bool dump_shader;
 	bool dump_preoptir;
 	bool record_llvm_ir;
+	bool force_correct_derivs_after_kill;
 	enum radeon_family family;
 	enum chip_class chip_class;
 	uint32_t tess_offchip_block_dw_size;
@@ -224,6 +225,7 @@  struct radv_shader_variant_info {
 			uint32_t flat_shaded_mask;
 			bool can_discard;
 			bool early_fragment_test;
+			bool force_correct_derivs_after_kill;
 		} fs;
 		struct {
 			unsigned block_size[3];

Comments

I think this has two issues:

1) We don't properly handles stores, which should be disabled.
2) this could turn a loop which only has a discard as exit into an
infinite loop.

Also have we confirmed that LLVM 6.0+  works correctly?

On Thu, May 10, 2018 at 11:43 AM, Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
> D3D behaviour and Vulkan are different. The workaround can
> be enabled with RADV_DEBUG=correctderivsafterkill.
>
> When enabled, this fixes rendering issues with DXVK. For
> now, I don't want to force-enable this behaviour for DXVK
> because having per-app workarounds is a bad idea.
>
> We are probably going to draft a little extension that
> explicitely enable this when needed.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/vulkan/radv_debug.h       |  1 +
>  src/amd/vulkan/radv_device.c      |  1 +
>  src/amd/vulkan/radv_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++
>  src/amd/vulkan/radv_shader.c      |  2 ++
>  src/amd/vulkan/radv_shader.h      |  2 ++
>  5 files changed, 42 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
> index 9dda9b6b0c..d63f6538d1 100644
> --- a/src/amd/vulkan/radv_debug.h
> +++ b/src/amd/vulkan/radv_debug.h
> @@ -45,6 +45,7 @@ enum {
>         RADV_DEBUG_PREOPTIR          = 0x8000,
>         RADV_DEBUG_NO_DYNAMIC_BOUNDS = 0x10000,
>         RADV_DEBUG_NO_OUT_OF_ORDER   = 0x20000,
> +       RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL = 0x40000,
>  };
>
>  enum {
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index a7f4a5ab7b..d01f20617f 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -391,6 +391,7 @@ static const struct debug_control radv_debug_options[] = {
>         {"preoptir", RADV_DEBUG_PREOPTIR},
>         {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
>         {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
> +       {"correctderivsafterkill", RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL},
>         {NULL, 0}
>  };
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index e2d241e495..60ee408871 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -85,6 +85,8 @@ struct radv_shader_context {
>         LLVMValueRef persp_sample, persp_center, persp_centroid;
>         LLVMValueRef linear_sample, linear_center, linear_centroid;
>
> +       LLVMValueRef postponed_kill;
> +
>         gl_shader_stage stage;
>
>         LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
> @@ -1486,6 +1488,22 @@ load_gs_input(struct ac_shader_abi *abi,
>  static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
>  {
>         struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
> +
> +       if (ctx->shader_info->fs.force_correct_derivs_after_kill) {
> +               /* LLVM 6.0 can kill immediately while maintaining WQM. */
> +               if (HAVE_LLVM >= 0x0600) {
> +                       ac_build_kill_if_false(&ctx->ac,
> +                                              ac_build_wqm_vote(&ctx->ac,
> +                                                                visible));
> +               }
> +
> +               LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder,
> +                                                 ctx->postponed_kill, "");
> +               mask = LLVMBuildAnd(ctx->ac.builder, mask, visible, "");
> +               LLVMBuildStore(ctx->ac.builder, mask, ctx->postponed_kill);
> +               return;
> +       }
> +
>         ac_build_kill_if_false(&ctx->ac, visible);
>  }
>
> @@ -2776,6 +2794,12 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
>         LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
>         struct ac_export_args color_args[8];
>
> +       if (ctx->postponed_kill) {
> +               ac_build_kill_if_false(&ctx->ac,
> +                                      LLVMBuildLoad(ctx->ac.builder,
> +                                                    ctx->postponed_kill, ""));
> +       }
> +
>         for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
>                 LLVMValueRef values[4];
>
> @@ -3075,6 +3099,18 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>         create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
>                         shader_count >= 2 ? shaders[shader_count - 2]->info.stage  : MESA_SHADER_VERTEX);
>
> +       if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
> +           shader_info->info.ps.uses_derivatives &&
> +           shader_info->info.ps.uses_kill &&
> +           options->force_correct_derivs_after_kill) {
> +               ctx.postponed_kill =
> +                       ac_build_alloca_undef(&ctx.ac, ctx.ac.i1, "");
> +               /* true = don't kill. */
> +               LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.postponed_kill);
> +
> +               shader_info->fs.force_correct_derivs_after_kill = true;
> +       }
> +
>         ctx.abi.inputs = &ctx.inputs[0];
>         ctx.abi.emit_outputs = handle_shader_outputs_post;
>         ctx.abi.emit_vertex = visit_emit_vertex;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 27b3fbed16..9bafd53b98 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -486,6 +486,8 @@ shader_variant_create(struct radv_device *device,
>                                  device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
>         options->record_llvm_ir = device->keep_shader_info;
>         options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
> +       options->force_correct_derivs_after_kill =
> +               device->instance->debug_flags & RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL;
>
>         if (options->supports_spill)
>                 tm_options |= AC_TM_SUPPORTS_SPILL;
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index f8826f41ef..fd190dc537 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -108,6 +108,7 @@ struct radv_nir_compiler_options {
>         bool dump_shader;
>         bool dump_preoptir;
>         bool record_llvm_ir;
> +       bool force_correct_derivs_after_kill;
>         enum radeon_family family;
>         enum chip_class chip_class;
>         uint32_t tess_offchip_block_dw_size;
> @@ -224,6 +225,7 @@ struct radv_shader_variant_info {
>                         uint32_t flat_shaded_mask;
>                         bool can_discard;
>                         bool early_fragment_test;
> +                       bool force_correct_derivs_after_kill;
>                 } fs;
>                 struct {
>                         unsigned block_size[3];
> --
> 2.17.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev