[Mesa-dev,28/46] glsl: don't lower variable indexing on non-patch tessellation inputs/outputs

Submitted by Marek Olšák on June 16, 2015, 11:01 p.m.

Details

Message ID 1434495702-27901-28-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák June 16, 2015, 11:01 p.m.
From: Marek Olšák <marek.olsak@amd.com>

There is no way to lower them, because the array sizes are unknown
at compile time.

Based on a patch from: Fabian Bieler <fabianbieler@fastmail.fm>
---
 src/glsl/ir_optimization.h                       |  5 +--
 src/glsl/lower_variable_index_to_cond_assign.cpp | 43 +++++++++++++++++-------
 src/glsl/test_optpass.cpp                        |  3 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  8 +++--
 src/mesa/program/ir_to_mesa.cpp                  |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp       |  2 +-
 6 files changed, 42 insertions(+), 21 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 688a5e1..a174c96 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -114,8 +114,9 @@  bool lower_discard(exec_list *instructions);
 void lower_discard_flow(exec_list *instructions);
 bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
 bool lower_noise(exec_list *instructions);
-bool lower_variable_index_to_cond_assign(exec_list *instructions,
-    bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
+    exec_list *instructions, bool lower_input, bool lower_output,
+    bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool lower_const_arrays_to_uniforms(exec_list *instructions);
 bool lower_clip_distance(gl_shader *shader);
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index d878cb0..b6421f5 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -335,12 +335,14 @@  struct switch_generator
 
 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
 public:
-   variable_index_to_cond_assign_visitor(bool lower_input,
-					 bool lower_output,
-					 bool lower_temp,
-					 bool lower_uniform)
+   variable_index_to_cond_assign_visitor(gl_shader_stage stage,
+                                         bool lower_input,
+                                         bool lower_output,
+                                         bool lower_temp,
+                                         bool lower_uniform)
    {
       this->progress = false;
+      this->stage = stage;
       this->lower_inputs = lower_input;
       this->lower_outputs = lower_output;
       this->lower_temps = lower_temp;
@@ -348,6 +350,8 @@  public:
    }
 
    bool progress;
+
+   gl_shader_stage stage;
    bool lower_inputs;
    bool lower_outputs;
    bool lower_temps;
@@ -369,17 +373,28 @@  public:
       case ir_var_auto:
       case ir_var_temporary:
 	 return this->lower_temps;
+
       case ir_var_uniform:
 	 return this->lower_uniforms;
+
       case ir_var_function_in:
       case ir_var_const_in:
          return this->lower_temps;
+
       case ir_var_shader_in:
+         if ((stage == MESA_SHADER_TESS_CTRL ||
+              stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
+            return false;
          return this->lower_inputs;
+
       case ir_var_function_out:
+         if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
+            return false;
          return this->lower_temps;
+
       case ir_var_shader_out:
          return this->lower_outputs;
+
       case ir_var_function_inout:
 	 return this->lower_temps;
       }
@@ -522,16 +537,18 @@  public:
 } /* anonymous namespace */
 
 bool
-lower_variable_index_to_cond_assign(exec_list *instructions,
-				    bool lower_input,
-				    bool lower_output,
-				    bool lower_temp,
-				    bool lower_uniform)
+lower_variable_index_to_cond_assign(gl_shader_stage stage,
+                                    exec_list *instructions,
+                                    bool lower_input,
+                                    bool lower_output,
+                                    bool lower_temp,
+                                    bool lower_uniform)
 {
-   variable_index_to_cond_assign_visitor v(lower_input,
-					   lower_output,
-					   lower_temp,
-					   lower_uniform);
+   variable_index_to_cond_assign_visitor v(stage,
+                                           lower_input,
+                                           lower_output,
+                                           lower_temp,
+                                           lower_uniform);
 
    /* Continue lowering until no progress is made.  If there are multiple
     * levels of indirection (e.g., non-constant indexing of array elements and
diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
index ac3e3f4..fed1fab 100644
--- a/src/glsl/test_optpass.cpp
+++ b/src/glsl/test_optpass.cpp
@@ -124,7 +124,8 @@  do_optimization(struct exec_list *ir, const char *optimization,
    } else if (sscanf(optimization, "lower_variable_index_to_cond_assign "
                      "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2,
                      &int_3) == 4) {
-      return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0,
+      return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir,
+                                                 int_0 != 0, int_1 != 0,
                                                  int_2 != 0, int_3 != 0);
    } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ",
                      &int_0) == 1) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 545ec26..8b5bb72 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -139,7 +139,8 @@  brw_lower_packing_builtins(struct brw_context *brw,
 }
 
 static void
-process_glsl_ir(struct brw_context *brw,
+process_glsl_ir(gl_shader_stage stage,
+                struct brw_context *brw,
                 struct gl_shader_program *shader_prog,
                 struct gl_shader *shader)
 {
@@ -185,7 +186,8 @@  process_glsl_ir(struct brw_context *brw,
    lower_quadop_vector(shader->ir, false);
 
    bool lowered_variable_indexing =
-      lower_variable_index_to_cond_assign(shader->ir,
+      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+                                          shader->ir,
                                           options->EmitNoIndirectInput,
                                           options->EmitNoIndirectOutput,
                                           options->EmitNoIndirectTemp,
@@ -262,7 +264,7 @@  brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 
       _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
 
-      process_glsl_ir(brw, shProg, shader);
+      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
 
       /* Make a pass over the IR to add state references for any built-in
        * uniforms that are used.  This has to be done now (during linking).
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 18e3bc5..8c7cd7d 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2910,7 +2910,7 @@  _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
 	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
 	   progress =
-	     lower_variable_index_to_cond_assign(ir,
+	     lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
 						 options->EmitNoIndirectInput,
 						 options->EmitNoIndirectOutput,
 						 options->EmitNoIndirectTemp,
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 03834b6..e440aef 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5798,7 +5798,7 @@  st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        */
       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
-         lower_variable_index_to_cond_assign(ir,
+         lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
                                              options->EmitNoIndirectInput,
                                              options->EmitNoIndirectOutput,
                                              options->EmitNoIndirectTemp,

Comments

On Wednesday, June 17, 2015 01:01:24 AM Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> There is no way to lower them, because the array sizes are unknown
> at compile time.
> 
> Based on a patch from: Fabian Bieler <fabianbieler@fastmail.fm>

I'm a bit confused by the justification given for this patch.

TCS/TES per-vertex inputs:
--------------------------

...are always fixed-size arrays of length gl_MaxPatchVertices, because:

"The length of gl_in is equal to the implementation-dependent maximum
 patch size (gl_MaxPatchVertices)."

"Similarly to the built-in inputs, each user-defined input variable has
 a value for each vertex and thus needs to be declared as arrays or
 inside input blocks declared as arrays.  Declaring an array size is
 optional.  If no size is specified, it will be taken from the
 implementation-dependent maximum patch size (gl_MaxPatchVertices).
 If a size is specified, it must match the maximum patch size;
 otherwise, a link-error will occur."

This same text exists for both TCS inputs and TES inputs.  Since we
always know the array size, I don't see why we can't do lowering in
this case.

I'm pretty new to tessellation shaders, so am I missing something?

TCS per-patch inputs:
---------------------

...don't exist AFAICT.

TES per-patch inputs:
---------------------

...do exist, require no special handling.

TCS per-vertex outputs:
-----------------------

...are arrays whose size is known at link time, but not necessarily
compile time.

"The length of gl_out is equal to the output patch size specified in the
 tessellation control shader output layout declaration."

"A tessellation control shader may also declare user-defined per-vertex
 output variables. User-defined per-vertex output variables are declared
 with the qualifier out and have a value for each vertex in the output
 patch. Such variables must be declared as arrays or inside output blocks
 declared as arrays. Declaring an array size is optional. If no size is
 specified, it will be taken from the output patch size declared in the
 shader."

Apparently, the index must also be gl_InvocationID when writing:

"While per-vertex output variables are declared as arrays indexed by
 vertex number, each tessellation control shader invocation may write only
 to those outputs corresponding to its output patch vertex. Tessellation
 control shaders must use the input variable gl_InvocationID as the
 vertex number index when writing to per-vertex output variables."

So we clearly don't want to do lowering on writes.  But for reads, it
seems like we could do lowering when the array size is known (such as
post-linking).  I'm not sure whether or not it's beneficial...

It might be nice to add a comment explaining why it makes no sense to
lower variable indexing on TCS output writes (with the above spec
citation).

TES outputs:
------------

...require no special handling.


> ---
>  src/glsl/ir_optimization.h                       |  5 +--
>  src/glsl/lower_variable_index_to_cond_assign.cpp | 43 +++++++++++++++++-------
>  src/glsl/test_optpass.cpp                        |  3 +-
>  src/mesa/drivers/dri/i965/brw_shader.cpp         |  8 +++--
>  src/mesa/program/ir_to_mesa.cpp                  |  2 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp       |  2 +-
>  6 files changed, 42 insertions(+), 21 deletions(-)
> 
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index 688a5e1..a174c96 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -114,8 +114,9 @@ bool lower_discard(exec_list *instructions);
>  void lower_discard_flow(exec_list *instructions);
>  bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
>  bool lower_noise(exec_list *instructions);
> -bool lower_variable_index_to_cond_assign(exec_list *instructions,
> -    bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
> +bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
> +    exec_list *instructions, bool lower_input, bool lower_output,
> +    bool lower_temp, bool lower_uniform);
>  bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
>  bool lower_const_arrays_to_uniforms(exec_list *instructions);
>  bool lower_clip_distance(gl_shader *shader);
> diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
> index d878cb0..b6421f5 100644
> --- a/src/glsl/lower_variable_index_to_cond_assign.cpp
> +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
> @@ -335,12 +335,14 @@ struct switch_generator
>  
>  class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
>  public:
> -   variable_index_to_cond_assign_visitor(bool lower_input,
> -					 bool lower_output,
> -					 bool lower_temp,
> -					 bool lower_uniform)
> +   variable_index_to_cond_assign_visitor(gl_shader_stage stage,
> +                                         bool lower_input,
> +                                         bool lower_output,
> +                                         bool lower_temp,
> +                                         bool lower_uniform)
>     {
>        this->progress = false;
> +      this->stage = stage;
>        this->lower_inputs = lower_input;
>        this->lower_outputs = lower_output;
>        this->lower_temps = lower_temp;
> @@ -348,6 +350,8 @@ public:
>     }
>  
>     bool progress;
> +
> +   gl_shader_stage stage;
>     bool lower_inputs;
>     bool lower_outputs;
>     bool lower_temps;
> @@ -369,17 +373,28 @@ public:
>        case ir_var_auto:
>        case ir_var_temporary:
>  	 return this->lower_temps;
> +
>        case ir_var_uniform:
>  	 return this->lower_uniforms;
> +
>        case ir_var_function_in:
>        case ir_var_const_in:
>           return this->lower_temps;
> +
>        case ir_var_shader_in:
> +         if ((stage == MESA_SHADER_TESS_CTRL ||
> +              stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
> +            return false;
>           return this->lower_inputs;
> +
>        case ir_var_function_out:
> +         if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
> +            return false;
>           return this->lower_temps;
> +
>        case ir_var_shader_out:
>           return this->lower_outputs;
> +
>        case ir_var_function_inout:
>  	 return this->lower_temps;
>        }
> @@ -522,16 +537,18 @@ public:
>  } /* anonymous namespace */
>  
>  bool
> -lower_variable_index_to_cond_assign(exec_list *instructions,
> -				    bool lower_input,
> -				    bool lower_output,
> -				    bool lower_temp,
> -				    bool lower_uniform)
> +lower_variable_index_to_cond_assign(gl_shader_stage stage,
> +                                    exec_list *instructions,
> +                                    bool lower_input,
> +                                    bool lower_output,
> +                                    bool lower_temp,
> +                                    bool lower_uniform)
>  {
> -   variable_index_to_cond_assign_visitor v(lower_input,
> -					   lower_output,
> -					   lower_temp,
> -					   lower_uniform);
> +   variable_index_to_cond_assign_visitor v(stage,
> +                                           lower_input,
> +                                           lower_output,
> +                                           lower_temp,
> +                                           lower_uniform);
>  
>     /* Continue lowering until no progress is made.  If there are multiple
>      * levels of indirection (e.g., non-constant indexing of array elements and
> diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
> index ac3e3f4..fed1fab 100644
> --- a/src/glsl/test_optpass.cpp
> +++ b/src/glsl/test_optpass.cpp
> @@ -124,7 +124,8 @@ do_optimization(struct exec_list *ir, const char *optimization,
>     } else if (sscanf(optimization, "lower_variable_index_to_cond_assign "
>                       "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2,
>                       &int_3) == 4) {
> -      return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0,
> +      return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir,
> +                                                 int_0 != 0, int_1 != 0,
>                                                   int_2 != 0, int_3 != 0);
>     } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ",
>                       &int_0) == 1) {
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 545ec26..8b5bb72 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -139,7 +139,8 @@ brw_lower_packing_builtins(struct brw_context *brw,
>  }
>  
>  static void
> -process_glsl_ir(struct brw_context *brw,
> +process_glsl_ir(gl_shader_stage stage,
> +                struct brw_context *brw,
>                  struct gl_shader_program *shader_prog,
>                  struct gl_shader *shader)
>  {
> @@ -185,7 +186,8 @@ process_glsl_ir(struct brw_context *brw,
>     lower_quadop_vector(shader->ir, false);
>  
>     bool lowered_variable_indexing =
> -      lower_variable_index_to_cond_assign(shader->ir,
> +      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
> +                                          shader->ir,
>                                            options->EmitNoIndirectInput,
>                                            options->EmitNoIndirectOutput,
>                                            options->EmitNoIndirectTemp,
> @@ -262,7 +264,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
>  
>        _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
>  
> -      process_glsl_ir(brw, shProg, shader);
> +      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
>  
>        /* Make a pass over the IR to add state references for any built-in
>         * uniforms that are used.  This has to be done now (during linking).
> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
> index 18e3bc5..8c7cd7d 100644
> --- a/src/mesa/program/ir_to_mesa.cpp
> +++ b/src/mesa/program/ir_to_mesa.cpp
> @@ -2910,7 +2910,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>  	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
>  	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
>  	   progress =
> -	     lower_variable_index_to_cond_assign(ir,
> +	     lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
>  						 options->EmitNoIndirectInput,
>  						 options->EmitNoIndirectOutput,
>  						 options->EmitNoIndirectTemp,
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 03834b6..e440aef 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -5798,7 +5798,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>         */
>        if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
>            options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
> -         lower_variable_index_to_cond_assign(ir,
> +         lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
>                                               options->EmitNoIndirectInput,
>                                               options->EmitNoIndirectOutput,
>                                               options->EmitNoIndirectTemp,
>
On Tue, Jun 23, 2015 at 2:04 AM, Kenneth Graunke <kenneth@whitecape.org> wrote:
> On Wednesday, June 17, 2015 01:01:24 AM Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> There is no way to lower them, because the array sizes are unknown
>> at compile time.
>>
>> Based on a patch from: Fabian Bieler <fabianbieler@fastmail.fm>
>
> I'm a bit confused by the justification given for this patch.
>
> TCS/TES per-vertex inputs:
> --------------------------
>
> ...are always fixed-size arrays of length gl_MaxPatchVertices, because:
>
> "The length of gl_in is equal to the implementation-dependent maximum
>  patch size (gl_MaxPatchVertices)."
>
> "Similarly to the built-in inputs, each user-defined input variable has
>  a value for each vertex and thus needs to be declared as arrays or
>  inside input blocks declared as arrays.  Declaring an array size is
>  optional.  If no size is specified, it will be taken from the
>  implementation-dependent maximum patch size (gl_MaxPatchVertices).
>  If a size is specified, it must match the maximum patch size;
>  otherwise, a link-error will occur."
>
> This same text exists for both TCS inputs and TES inputs.  Since we
> always know the array size, I don't see why we can't do lowering in
> this case.
>
> I'm pretty new to tessellation shaders, so am I missing something?
>
> TCS per-patch inputs:
> ---------------------
>
> ...don't exist AFAICT.
>
> TES per-patch inputs:
> ---------------------
>
> ...do exist, require no special handling.
>
> TCS per-vertex outputs:
> -----------------------
>
> ...are arrays whose size is known at link time, but not necessarily
> compile time.
>
> "The length of gl_out is equal to the output patch size specified in the
>  tessellation control shader output layout declaration."
>
> "A tessellation control shader may also declare user-defined per-vertex
>  output variables. User-defined per-vertex output variables are declared
>  with the qualifier out and have a value for each vertex in the output
>  patch. Such variables must be declared as arrays or inside output blocks
>  declared as arrays. Declaring an array size is optional. If no size is
>  specified, it will be taken from the output patch size declared in the
>  shader."
>
> Apparently, the index must also be gl_InvocationID when writing:
>
> "While per-vertex output variables are declared as arrays indexed by
>  vertex number, each tessellation control shader invocation may write only
>  to those outputs corresponding to its output patch vertex. Tessellation
>  control shaders must use the input variable gl_InvocationID as the
>  vertex number index when writing to per-vertex output variables."
>
> So we clearly don't want to do lowering on writes.  But for reads, it
> seems like we could do lowering when the array size is known (such as
> post-linking).  I'm not sure whether or not it's beneficial...
>
> It might be nice to add a comment explaining why it makes no sense to
> lower variable indexing on TCS output writes (with the above spec
> citation).

gl_MaxPatchVertices (typically 32) is the implementation-dependent
maximum limit. The real size is unknown at compile time. It's usually
3 or 4 in most apps though.

For TCS inputs, the size is specified by glPatchParameteri(GL_PATCH_VERTICES).
For TES inputs, the size is specified by the "vertices" output layout
qualifier in TCS.

The gl_PatchVerticesIn built-in uniform contains the real size.

I'll add a comment that explains it.

Marek