nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

Submitted by Marek Olšák on April 10, 2019, 2:03 a.m.

Details

Message ID 20190410020321.24116-1-maraeo@gmail.com
State New
Headers show
Series "nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák April 10, 2019, 2:03 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/compiler/nir/nir.h                    |  8 +++++
 src/compiler/nir/nir_opt_intrinsics.c     | 40 +++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_get.c     |  1 +
 src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
 4 files changed, 48 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 09950bf3398..d3874705235 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2283,20 +2283,28 @@  typedef struct nir_shader_compiler_options {
     *
     * This depends on some possibly hw implementation details, which may
     * not be true for all hw.  In particular that the FS is only executed
     * for covered samples or for helper invocations.  So, do not blindly
     * enable this option.
     *
     * Note: See also issue #22 in ARB_shader_image_load_store
     */
    bool lower_helper_invocation;
 
+   /**
+    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
+    *
+    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+    */
+   bool optimize_sample_mask_in;
+
    bool lower_cs_local_index_from_id;
    bool lower_cs_local_id_from_index;
 
    bool lower_device_index_to_zero;
 
    /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */
    bool lower_wpos_pntc;
 
    bool lower_hadd;
    bool lower_add_sat;
diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c
index 7b054faa204..2f9e4e466c9 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -22,21 +22,22 @@ 
  */
 
 #include "nir.h"
 #include "nir_builder.h"
 
 /**
  * \file nir_opt_intrinsics.c
  */
 
 static bool
-opt_intrinsics_impl(nir_function_impl *impl)
+opt_intrinsics_impl(nir_function_impl *impl,
+                    const struct nir_shader_compiler_options *options)
 {
    nir_builder b;
    nir_builder_init(&b, impl);
    bool progress = false;
 
    nir_foreach_block(block, impl) {
       nir_foreach_instr_safe(instr, block) {
          if (instr->type != nir_instr_type_intrinsic)
             continue;
 
@@ -48,20 +49,55 @@  opt_intrinsics_impl(nir_function_impl *impl)
          case nir_intrinsic_vote_any:
          case nir_intrinsic_vote_all:
             if (nir_src_is_const(intrin->src[0]))
                replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
             break;
          case nir_intrinsic_vote_feq:
          case nir_intrinsic_vote_ieq:
             if (nir_src_is_const(intrin->src[0]))
                replacement = nir_imm_true(&b);
             break;
+         case nir_intrinsic_load_sample_mask_in:
+            /* Transform:
+             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+             */
+            if (!options->optimize_sample_mask_in)
+               continue;
+
+            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
+               if (use_src->parent_instr->type == nir_instr_type_alu) {
+                  nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
+
+                  if (alu->op == nir_op_ieq ||
+                      alu->op == nir_op_ine) {
+                     /* Check for 0 in either operand. */
+                     nir_const_value *const_val =
+                         nir_src_as_const_value(alu->src[0].src);
+                     if (!const_val)
+                        const_val = nir_src_as_const_value(alu->src[1].src);
+                     if (!const_val || const_val->i32[0] != 0)
+                        continue;
+
+                     nir_ssa_def *new_expr = nir_load_helper_invocation(&b, 1);
+
+                     if (alu->op == nir_op_ine32)
+                        new_expr = nir_inot(&b, new_expr);
+
+                     nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+                                              nir_src_for_ssa(new_expr));
+                     nir_instr_remove(&alu->instr);
+                     continue;
+                  }
+               }
+            }
+            continue;
          default:
             break;
          }
 
          if (!replacement)
             continue;
 
          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                   nir_src_for_ssa(replacement));
          nir_instr_remove(instr);
@@ -74,19 +110,19 @@  opt_intrinsics_impl(nir_function_impl *impl)
 
 bool
 nir_opt_intrinsics(nir_shader *shader)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
       if (!function->impl)
          continue;
 
-      if (opt_intrinsics_impl(function->impl)) {
+      if (opt_intrinsics_impl(function->impl, shader->options)) {
          progress = true;
          nir_metadata_preserve(function->impl, nir_metadata_block_index |
                                                nir_metadata_dominance);
       }
    }
 
    return progress;
 }
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 58b56b34d13..2142d5a33f2 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -494,20 +494,21 @@  static const struct nir_shader_compiler_options nir_options = {
 	.lower_pack_snorm_2x16 = true,
 	.lower_pack_snorm_4x8 = true,
 	.lower_pack_unorm_2x16 = true,
 	.lower_pack_unorm_4x8 = true,
 	.lower_unpack_snorm_2x16 = true,
 	.lower_unpack_snorm_4x8 = true,
 	.lower_unpack_unorm_2x16 = true,
 	.lower_unpack_unorm_4x8 = true,
 	.lower_extract_byte = true,
 	.lower_extract_word = true,
+	.optimize_sample_mask_in = true,
 	.max_unroll_iterations = 32,
 	.native_integers = true,
 };
 
 static const void *
 si_get_compiler_options(struct pipe_screen *screen,
 			enum pipe_shader_ir ir,
 			enum pipe_shader_type shader)
 {
 	assert(ir == PIPE_SHADER_IR_NIR);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index fb10869c9f9..8028ccf7110 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -497,20 +497,21 @@  st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
    /* This has to be done last.  Any operation the can cause
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
     */
    _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
 
    st_set_prog_affected_state_flags(prog);
 
    NIR_PASS_V(nir, st_nir_lower_builtin);
    NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
+   NIR_PASS_V(nir, nir_opt_intrinsics);
 
    nir_variable_mode mask = nir_var_function_temp;
    nir_remove_dead_variables(nir, mask);
 
    if (st->ctx->_Shader->Flags & GLSL_DUMP) {
       _mesa_log("\n");
       _mesa_log("NIR IR for linked %s program %d:\n",
              _mesa_shader_stage_to_string(prog->info.stage),
              shader_program->Name);
       nir_print_shader(nir, _mesa_get_log_file());

Comments


On 16/4/19 7:03 am, Marek Olšák wrote:
> ping
> 
> On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák <maraeo@gmail.com 
> <mailto:maraeo@gmail.com>> wrote:
> 
>     From: Marek Olšák <marek.olsak@amd.com <mailto:marek.olsak@amd.com>>
> 
>     ---
>       src/compiler/nir/nir.h                    |  8 +++++
>       src/compiler/nir/nir_opt_intrinsics.c     | 40 +++++++++++++++++++++--
>       src/gallium/drivers/radeonsi/si_get.c     |  1 +
>       src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
>       4 files changed, 48 insertions(+), 2 deletions(-)
> 
>     diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>     index 09950bf3398..d3874705235 100644
>     --- a/src/compiler/nir/nir.h
>     +++ b/src/compiler/nir/nir.h
>     @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
>           *
>           * This depends on some possibly hw implementation details,
>     which may
>           * not be true for all hw.  In particular that the FS is only
>     executed
>           * for covered samples or for helper invocations.  So, do not
>     blindly
>           * enable this option.
>           *
>           * Note: See also issue #22 in ARB_shader_image_load_store
>           */
>          bool lower_helper_invocation;
> 
>     +   /**
>     +    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
>     +    *
>     +    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
>     +    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
>     +    */
>     +   bool optimize_sample_mask_in;
>     +
>          bool lower_cs_local_index_from_id;
>          bool lower_cs_local_id_from_index;
> 
>          bool lower_device_index_to_zero;
> 
>          /* Set if nir_lower_wpos_ytransform() should also invert
>     gl_PointCoord. */
>          bool lower_wpos_pntc;
> 
>          bool lower_hadd;
>          bool lower_add_sat;
>     diff --git a/src/compiler/nir/nir_opt_intrinsics.c
>     b/src/compiler/nir/nir_opt_intrinsics.c
>     index 7b054faa204..2f9e4e466c9 100644
>     --- a/src/compiler/nir/nir_opt_intrinsics.c
>     +++ b/src/compiler/nir/nir_opt_intrinsics.c
>     @@ -22,21 +22,22 @@
>        */
> 
>       #include "nir.h"
>       #include "nir_builder.h"
> 
>       /**
>        * \file nir_opt_intrinsics.c
>        */
> 
>       static bool
>     -opt_intrinsics_impl(nir_function_impl *impl)
>     +opt_intrinsics_impl(nir_function_impl *impl,
>     +                    const struct nir_shader_compiler_options *options)
>       {
>          nir_builder b;
>          nir_builder_init(&b, impl);
>          bool progress = false;
> 
>          nir_foreach_block(block, impl) {
>             nir_foreach_instr_safe(instr, block) {
>                if (instr->type != nir_instr_type_intrinsic)
>                   continue;
> 
>     @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
>                case nir_intrinsic_vote_any:
>                case nir_intrinsic_vote_all:
>                   if (nir_src_is_const(intrin->src[0]))
>                      replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>                   break;
>                case nir_intrinsic_vote_feq:
>                case nir_intrinsic_vote_ieq:
>                   if (nir_src_is_const(intrin->src[0]))
>                      replacement = nir_imm_true(&b);
>                   break;
>     +         case nir_intrinsic_load_sample_mask_in:
>     +            /* Transform:
>     +             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
>     +             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
>     +             */
>     +            if (!options->optimize_sample_mask_in)
>     +               continue;
>     +
>     +            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
>     +               if (use_src->parent_instr->type == nir_instr_type_alu) {
>     +                  nir_alu_instr *alu =
>     nir_instr_as_alu(use_src->parent_instr);
>     +
>     +                  if (alu->op == nir_op_ieq ||
>     +                      alu->op == nir_op_ine) {
>     +                     /* Check for 0 in either operand. */
>     +                     nir_const_value *const_val =
>     +                         nir_src_as_const_value(alu->src[0].src);
>     +                     if (!const_val)
>     +                        const_val =
>     nir_src_as_const_value(alu->src[1].src);
>     +                     if (!const_val || const_val->i32[0] != 0)
>     +                        continue;
>     +
>     +                     nir_ssa_def *new_expr =
>     nir_load_helper_invocation(&b, 1);
>     +
>     +                     if (alu->op == nir_op_ine32)

How can this be nir_op_ine32 when the outer if condition is:

alu->op == nir_op_ieq || alu->op == nir_op_ine

??

>     +                        new_expr = nir_inot(&b, new_expr);
>     +
>     +                     nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
>     +                                             
>     nir_src_for_ssa(new_expr));
>     +                     nir_instr_remove(&alu->instr);
>     +                     continue;
>     +                  }
>     +               }
>     +            }
>     +            continue;
>                default:
>                   break;
>                }
> 
>                if (!replacement)
>                   continue;
> 
>                nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>                                         nir_src_for_ssa(replacement));
>                nir_instr_remove(instr);
>     @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
> 
>       bool
>       nir_opt_intrinsics(nir_shader *shader)
>       {
>          bool progress = false;
> 
>          nir_foreach_function(function, shader) {
>             if (!function->impl)
>                continue;
> 
>     -      if (opt_intrinsics_impl(function->impl)) {
>     +      if (opt_intrinsics_impl(function->impl, shader->options)) {
>                progress = true;
>                nir_metadata_preserve(function->impl,
>     nir_metadata_block_index |
>                                                     
>     nir_metadata_dominance);
>             }
>          }
> 
>          return progress;
>       }
>     diff --git a/src/gallium/drivers/radeonsi/si_get.c
>     b/src/gallium/drivers/radeonsi/si_get.c
>     index 58b56b34d13..2142d5a33f2 100644
>     --- a/src/gallium/drivers/radeonsi/si_get.c
>     +++ b/src/gallium/drivers/radeonsi/si_get.c
>     @@ -494,20 +494,21 @@ static const struct
>     nir_shader_compiler_options nir_options = {
>              .lower_pack_snorm_2x16 = true,
>              .lower_pack_snorm_4x8 = true,
>              .lower_pack_unorm_2x16 = true,
>              .lower_pack_unorm_4x8 = true,
>              .lower_unpack_snorm_2x16 = true,
>              .lower_unpack_snorm_4x8 = true,
>              .lower_unpack_unorm_2x16 = true,
>              .lower_unpack_unorm_4x8 = true,
>              .lower_extract_byte = true,
>              .lower_extract_word = true,
>     +       .optimize_sample_mask_in = true,
>              .max_unroll_iterations = 32,
>              .native_integers = true,
>       };
> 
>       static const void *
>       si_get_compiler_options(struct pipe_screen *screen,
>                              enum pipe_shader_ir ir,
>                              enum pipe_shader_type shader)
>       {
>              assert(ir == PIPE_SHADER_IR_NIR);
>     diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp
>     b/src/mesa/state_tracker/st_glsl_to_nir.cpp
>     index fb10869c9f9..8028ccf7110 100644
>     --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
>     +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
>     @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context
>     *st, struct gl_program *prog,
>          /* This has to be done last.  Any operation the can cause
>           * prog->ParameterValues to get reallocated (e.g., anything
>     that adds a
>           * program constant) has to happen before creating this linkage.
>           */
>          _mesa_associate_uniform_storage(st->ctx, shader_program, prog,
>     true);
> 
>          st_set_prog_affected_state_flags(prog);
> 
>          NIR_PASS_V(nir, st_nir_lower_builtin);
>          NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
>     +   NIR_PASS_V(nir, nir_opt_intrinsics);
> 
>          nir_variable_mode mask = nir_var_function_temp;
>          nir_remove_dead_variables(nir, mask);
> 
>          if (st->ctx->_Shader->Flags & GLSL_DUMP) {
>             _mesa_log("\n");
>             _mesa_log("NIR IR for linked %s program %d:\n",
>                    _mesa_shader_stage_to_string(prog->info.stage),
>                    shader_program->Name);
>             nir_print_shader(nir, _mesa_get_log_file());
>     -- 
>     2.17.1
> 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

On 16/4/19 1:14 pm, Marek Olšák wrote:
> 
> 
> On Mon, Apr 15, 2019, 10:41 PM Timothy Arceri <tarceri@itsqueeze.com 
> <mailto:tarceri@itsqueeze.com>> wrote:
> 
> 
> 
>     On 16/4/19 7:03 am, Marek Olšák wrote:
>      > ping
>      >
>      > On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák <maraeo@gmail.com
>     <mailto:maraeo@gmail.com>
>      > <mailto:maraeo@gmail.com <mailto:maraeo@gmail.com>>> wrote:
>      >
>      >     From: Marek Olšák <marek.olsak@amd.com
>     <mailto:marek.olsak@amd.com> <mailto:marek.olsak@amd.com
>     <mailto:marek.olsak@amd.com>>>
>      >
>      >     ---
>      >       src/compiler/nir/nir.h                    |  8 +++++
>      >       src/compiler/nir/nir_opt_intrinsics.c     | 40
>     +++++++++++++++++++++--
>      >       src/gallium/drivers/radeonsi/si_get.c     |  1 +
>      >       src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
>      >       4 files changed, 48 insertions(+), 2 deletions(-)
>      >
>      >     diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>      >     index 09950bf3398..d3874705235 100644
>      >     --- a/src/compiler/nir/nir.h
>      >     +++ b/src/compiler/nir/nir.h
>      >     @@ -2283,20 +2283,28 @@ typedef struct
>     nir_shader_compiler_options {
>      >           *
>      >           * This depends on some possibly hw implementation details,
>      >     which may
>      >           * not be true for all hw.  In particular that the FS is
>     only
>      >     executed
>      >           * for covered samples or for helper invocations.  So,
>     do not
>      >     blindly
>      >           * enable this option.
>      >           *
>      >           * Note: See also issue #22 in ARB_shader_image_load_store
>      >           */
>      >          bool lower_helper_invocation;
>      >
>      >     +   /**
>      >     +    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
>      >     +    *
>      >     +    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
>      >     +    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
>      >     +    */
>      >     +   bool optimize_sample_mask_in;
>      >     +
>      >          bool lower_cs_local_index_from_id;
>      >          bool lower_cs_local_id_from_index;
>      >
>      >          bool lower_device_index_to_zero;
>      >
>      >          /* Set if nir_lower_wpos_ytransform() should also invert
>      >     gl_PointCoord. */
>      >          bool lower_wpos_pntc;
>      >
>      >          bool lower_hadd;
>      >          bool lower_add_sat;
>      >     diff --git a/src/compiler/nir/nir_opt_intrinsics.c
>      >     b/src/compiler/nir/nir_opt_intrinsics.c
>      >     index 7b054faa204..2f9e4e466c9 100644
>      >     --- a/src/compiler/nir/nir_opt_intrinsics.c
>      >     +++ b/src/compiler/nir/nir_opt_intrinsics.c
>      >     @@ -22,21 +22,22 @@
>      >        */
>      >
>      >       #include "nir.h"
>      >       #include "nir_builder.h"
>      >
>      >       /**
>      >        * \file nir_opt_intrinsics.c
>      >        */
>      >
>      >       static bool
>      >     -opt_intrinsics_impl(nir_function_impl *impl)
>      >     +opt_intrinsics_impl(nir_function_impl *impl,
>      >     +                    const struct nir_shader_compiler_options
>     *options)
>      >       {
>      >          nir_builder b;
>      >          nir_builder_init(&b, impl);
>      >          bool progress = false;
>      >
>      >          nir_foreach_block(block, impl) {
>      >             nir_foreach_instr_safe(instr, block) {
>      >                if (instr->type != nir_instr_type_intrinsic)
>      >                   continue;
>      >
>      >     @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
>      >                case nir_intrinsic_vote_any:
>      >                case nir_intrinsic_vote_all:
>      >                   if (nir_src_is_const(intrin->src[0]))
>      >                      replacement = nir_ssa_for_src(&b,
>     intrin->src[0], 1);
>      >                   break;
>      >                case nir_intrinsic_vote_feq:
>      >                case nir_intrinsic_vote_ieq:
>      >                   if (nir_src_is_const(intrin->src[0]))
>      >                      replacement = nir_imm_true(&b);
>      >                   break;
>      >     +         case nir_intrinsic_load_sample_mask_in:
>      >     +            /* Transform:
>      >     +             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
>      >     +             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
>      >     +             */
>      >     +            if (!options->optimize_sample_mask_in)
>      >     +               continue;
>      >     +
>      >     +            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
>      >     +               if (use_src->parent_instr->type ==
>     nir_instr_type_alu) {
>      >     +                  nir_alu_instr *alu =
>      >     nir_instr_as_alu(use_src->parent_instr);
>      >     +
>      >     +                  if (alu->op == nir_op_ieq ||
>      >     +                      alu->op == nir_op_ine) {
>      >     +                     /* Check for 0 in either operand. */
>      >     +                     nir_const_value *const_val =
>      >     +                       
>       nir_src_as_const_value(alu->src[0].src);
>      >     +                     if (!const_val)
>      >     +                        const_val =
>      >     nir_src_as_const_value(alu->src[1].src);
>      >     +                     if (!const_val || const_val->i32[0] != 0)
>      >     +                        continue;
>      >     +
>      >     +                     nir_ssa_def *new_expr =
>      >     nir_load_helper_invocation(&b, 1);
>      >     +
>      >     +                     if (alu->op == nir_op_ine32)
> 
>     How can this be nir_op_ine32 when the outer if condition is:
> 
>     alu->op == nir_op_ieq || alu->op == nir_op_ine
> 
>     ??
> 
> 
> Yeah, that's a leftover from the previous version that used the 32 
> suffix everywhere and didn't work.

Ok. Well with that fixed the rest of the logic looks correct. I can't 
say I'm informed enough to know the details of how the opt works so:

Acked-by: Timothy Arceri <tarceri@itsqueeze.com>

> 
> Marek
> 
> 
>      >     +                        new_expr = nir_inot(&b, new_expr);
>      >     +
>      >     +                   
>       nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
>      >     +
>      >     nir_src_for_ssa(new_expr));
>      >     +                     nir_instr_remove(&alu->instr);
>      >     +                     continue;
>      >     +                  }
>      >     +               }
>      >     +            }
>      >     +            continue;
>      >                default:
>      >                   break;
>      >                }
>      >
>      >                if (!replacement)
>      >                   continue;
>      >
>      >                nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>      >                                       
>       nir_src_for_ssa(replacement));
>      >                nir_instr_remove(instr);
>      >     @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
>      >
>      >       bool
>      >       nir_opt_intrinsics(nir_shader *shader)
>      >       {
>      >          bool progress = false;
>      >
>      >          nir_foreach_function(function, shader) {
>      >             if (!function->impl)
>      >                continue;
>      >
>      >     -      if (opt_intrinsics_impl(function->impl)) {
>      >     +      if (opt_intrinsics_impl(function->impl,
>     shader->options)) {
>      >                progress = true;
>      >                nir_metadata_preserve(function->impl,
>      >     nir_metadata_block_index |
>      >
>      >     nir_metadata_dominance);
>      >             }
>      >          }
>      >
>      >          return progress;
>      >       }
>      >     diff --git a/src/gallium/drivers/radeonsi/si_get.c
>      >     b/src/gallium/drivers/radeonsi/si_get.c
>      >     index 58b56b34d13..2142d5a33f2 100644
>      >     --- a/src/gallium/drivers/radeonsi/si_get.c
>      >     +++ b/src/gallium/drivers/radeonsi/si_get.c
>      >     @@ -494,20 +494,21 @@ static const struct
>      >     nir_shader_compiler_options nir_options = {
>      >              .lower_pack_snorm_2x16 = true,
>      >              .lower_pack_snorm_4x8 = true,
>      >              .lower_pack_unorm_2x16 = true,
>      >              .lower_pack_unorm_4x8 = true,
>      >              .lower_unpack_snorm_2x16 = true,
>      >              .lower_unpack_snorm_4x8 = true,
>      >              .lower_unpack_unorm_2x16 = true,
>      >              .lower_unpack_unorm_4x8 = true,
>      >              .lower_extract_byte = true,
>      >              .lower_extract_word = true,
>      >     +       .optimize_sample_mask_in = true,
>      >              .max_unroll_iterations = 32,
>      >              .native_integers = true,
>      >       };
>      >
>      >       static const void *
>      >       si_get_compiler_options(struct pipe_screen *screen,
>      >                              enum pipe_shader_ir ir,
>      >                              enum pipe_shader_type shader)
>      >       {
>      >              assert(ir == PIPE_SHADER_IR_NIR);
>      >     diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp
>      >     b/src/mesa/state_tracker/st_glsl_to_nir.cpp
>      >     index fb10869c9f9..8028ccf7110 100644
>      >     --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
>      >     +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
>      >     @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context
>      >     *st, struct gl_program *prog,
>      >          /* This has to be done last.  Any operation the can cause
>      >           * prog->ParameterValues to get reallocated (e.g., anything
>      >     that adds a
>      >           * program constant) has to happen before creating this
>     linkage.
>      >           */
>      >          _mesa_associate_uniform_storage(st->ctx, shader_program,
>     prog,
>      >     true);
>      >
>      >          st_set_prog_affected_state_flags(prog);
>      >
>      >          NIR_PASS_V(nir, st_nir_lower_builtin);
>      >          NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
>      >     +   NIR_PASS_V(nir, nir_opt_intrinsics);
>      >
>      >          nir_variable_mode mask = nir_var_function_temp;
>      >          nir_remove_dead_variables(nir, mask);
>      >
>      >          if (st->ctx->_Shader->Flags & GLSL_DUMP) {
>      >             _mesa_log("\n");
>      >             _mesa_log("NIR IR for linked %s program %d:\n",
>      >                    _mesa_shader_stage_to_string(prog->info.stage),
>      >                    shader_program->Name);
>      >             nir_print_shader(nir, _mesa_get_log_file());
>      >     --
>      >     2.17.1
>      >
>      >
>      > _______________________________________________
>      > mesa-dev mailing list
>      > mesa-dev@lists.freedesktop.org
>     <mailto:mesa-dev@lists.freedesktop.org>
>      > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>      >
>