[Mesa-dev] glsl: split ssbo array copies into element copies

Submitted by Iago Toral Quiroga on Nov. 20, 2015, 2:48 p.m.

Details

Message ID 1448030907-14319-1-git-send-email-itoral@igalia.com
State New
Headers show
Series "glsl: split ssbo array copies into element copies" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Nov. 20, 2015, 2:48 p.m.
Improves register pressure, since otherwise we end up emitting
loads for all the elements in the RHS and them emitting
stores for all elements in the LHS.

Fixes the following piglit test:
tests/spec/arb_shader_storage_buffer_object/execution/large-field-copy.shader_test
---

Jordan, this fixes the link failure for the the test you provided. Needs more
testing and I have to check if I need to do something for structs that contain
arrays too, etc I'll do that on Monday unless someone else comes with a better
idea to fix this.

 src/glsl/lower_ubo_reference.cpp | 61 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index b74aa3d..7d48960 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -154,6 +154,7 @@  public:
    ir_call *ssbo_load(const struct glsl_type *type,
                       ir_rvalue *offset);
 
+   bool check_for_ssbo_array_copy(ir_assignment *ir);
    void check_for_ssbo_store(ir_assignment *ir);
    void write_to_memory(ir_dereference *deref,
                         ir_variable *var,
@@ -1133,9 +1134,69 @@  lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
 }
 
 
+bool
+lower_ubo_reference_visitor::check_for_ssbo_array_copy(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs || !ir->rhs)
+      return false;
+
+   ir_dereference *rhs_deref = ir->rhs->as_dereference();
+   if (!rhs_deref)
+      return false;
+
+   ir_dereference *lhs_deref = ir->lhs->as_dereference();
+   if (!lhs_deref)
+      return false;
+
+   /* LHS and RHS must be SSBO variables */
+   ir_variable *lhs_var = ir->lhs->variable_referenced();
+   if (!lhs_var || !lhs_var->is_in_shader_storage_block())
+      return false;
+
+   ir_variable *rhs_var = ir->rhs->variable_referenced();
+   if (!rhs_var || !rhs_var->is_in_shader_storage_block())
+      return false;
+
+   /* LHS and RHS must be variable dereferences.
+    * FIXME: arrays of arrays?
+    */
+   if (!ir->lhs->as_dereference_variable() ||
+       !ir->rhs->as_dereference_variable())
+      return false;
+
+   /* LHS and RHS must be arrays */
+   if (!rhs_var->type->is_array() || !lhs_var->type->is_array())
+      return false;
+
+   assert(lhs_deref->type->length == rhs_deref->type->length);
+
+   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+      ir_dereference *lhs_i =
+         new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
+                                           new(mem_ctx) ir_constant(i));
+
+      ir_dereference *rhs_i =
+         new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
+                                           new(mem_ctx) ir_constant(i));
+      ir->insert_after(assign(lhs_i, rhs_i));
+   }
+
+   ir->remove();
+   return true;
+}
+
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
+   /* Array copies could involve large amounts of SSBO load/store
+    * operations. To improve register pressure we want to special-case
+    * this and split the array copy into many individual element copies.
+    * This way we avoid emitting all the loads for the RHS first and
+    * all the writes for the LHS second.
+    */
+   if (check_for_ssbo_array_copy(ir))
+      return visit_continue_with_parent;
+
    check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_store(ir);
    return rvalue_visit(ir);

Comments

On 2015-11-20 06:48:27, Iago Toral Quiroga wrote:
> Improves register pressure, since otherwise we end up emitting
> loads for all the elements in the RHS and them emitting
> stores for all elements in the LHS.
> 
> Fixes the following piglit test:
> tests/spec/arb_shader_storage_buffer_object/execution/large-field-copy.shader_test
> ---
> 
> Jordan, this fixes the link failure for the the test you provided. Needs more
> testing and I have to check if I need to do something for structs that contain
> arrays too, etc I'll do that on Monday unless someone else comes with a better
> idea to fix this.
> 
>  src/glsl/lower_ubo_reference.cpp | 61 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 61 insertions(+)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
> index b74aa3d..7d48960 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -154,6 +154,7 @@ public:
>     ir_call *ssbo_load(const struct glsl_type *type,
>                        ir_rvalue *offset);
>  
> +   bool check_for_ssbo_array_copy(ir_assignment *ir);
>     void check_for_ssbo_store(ir_assignment *ir);
>     void write_to_memory(ir_dereference *deref,
>                          ir_variable *var,
> @@ -1133,9 +1134,69 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
>  }
>  
>  
> +bool
> +lower_ubo_reference_visitor::check_for_ssbo_array_copy(ir_assignment *ir)
> +{
> +   if (!ir || !ir->lhs || !ir->rhs)
> +      return false;
> +
> +   ir_dereference *rhs_deref = ir->rhs->as_dereference();
> +   if (!rhs_deref)
> +      return false;
> +
> +   ir_dereference *lhs_deref = ir->lhs->as_dereference();
> +   if (!lhs_deref)
> +      return false;
> +
> +   /* LHS and RHS must be SSBO variables */

Must they? In fact, the issue that prompted this was a copy from an
SSBO to a CS shared variable in the ES3.1 CTS. Maybe just the source
or dest being an SSBO is good enough?

Also, I just re-sent a piglit patch. In this case I just focused on
testing that the shader linked. I tried the 'array' version with this
patch, and it hit an assertion in nir_validate.

The copy 'struct' version still fails to register allocate.

-Jordan

> +   ir_variable *lhs_var = ir->lhs->variable_referenced();
> +   if (!lhs_var || !lhs_var->is_in_shader_storage_block())
> +      return false;
> +
> +   ir_variable *rhs_var = ir->rhs->variable_referenced();
> +   if (!rhs_var || !rhs_var->is_in_shader_storage_block())
> +      return false;
> +
> +   /* LHS and RHS must be variable dereferences.
> +    * FIXME: arrays of arrays?
> +    */
> +   if (!ir->lhs->as_dereference_variable() ||
> +       !ir->rhs->as_dereference_variable())
> +      return false;
> +
> +   /* LHS and RHS must be arrays */
> +   if (!rhs_var->type->is_array() || !lhs_var->type->is_array())
> +      return false;
> +
> +   assert(lhs_deref->type->length == rhs_deref->type->length);
> +
> +   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
> +      ir_dereference *lhs_i =
> +         new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
> +                                           new(mem_ctx) ir_constant(i));
> +
> +      ir_dereference *rhs_i =
> +         new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
> +                                           new(mem_ctx) ir_constant(i));
> +      ir->insert_after(assign(lhs_i, rhs_i));
> +   }
> +
> +   ir->remove();
> +   return true;
> +}
> +
>  ir_visitor_status
>  lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
>  {
> +   /* Array copies could involve large amounts of SSBO load/store
> +    * operations. To improve register pressure we want to special-case
> +    * this and split the array copy into many individual element copies.
> +    * This way we avoid emitting all the loads for the RHS first and
> +    * all the writes for the LHS second.
> +    */
> +   if (check_for_ssbo_array_copy(ir))
> +      return visit_continue_with_parent;
> +
>     check_ssbo_unsized_array_length_assignment(ir);
>     check_for_ssbo_store(ir);
>     return rvalue_visit(ir);
> -- 
> 1.9.1
>
On Fri, 2015-11-20 at 11:09 -0800, Jordan Justen wrote:
> On 2015-11-20 06:48:27, Iago Toral Quiroga wrote:
> > Improves register pressure, since otherwise we end up emitting
> > loads for all the elements in the RHS and them emitting
> > stores for all elements in the LHS.
> > 
> > Fixes the following piglit test:
> > tests/spec/arb_shader_storage_buffer_object/execution/large-field-copy.shader_test
> > ---
> > 
> > Jordan, this fixes the link failure for the the test you provided. Needs more
> > testing and I have to check if I need to do something for structs that contain
> > arrays too, etc I'll do that on Monday unless someone else comes with a better
> > idea to fix this.
> > 
> >  src/glsl/lower_ubo_reference.cpp | 61 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 61 insertions(+)
> > 
> > diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
> > index b74aa3d..7d48960 100644
> > --- a/src/glsl/lower_ubo_reference.cpp
> > +++ b/src/glsl/lower_ubo_reference.cpp
> > @@ -154,6 +154,7 @@ public:
> >     ir_call *ssbo_load(const struct glsl_type *type,
> >                        ir_rvalue *offset);
> >  
> > +   bool check_for_ssbo_array_copy(ir_assignment *ir);
> >     void check_for_ssbo_store(ir_assignment *ir);
> >     void write_to_memory(ir_dereference *deref,
> >                          ir_variable *var,
> > @@ -1133,9 +1134,69 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
> >  }
> >  
> >  
> > +bool
> > +lower_ubo_reference_visitor::check_for_ssbo_array_copy(ir_assignment *ir)
> > +{
> > +   if (!ir || !ir->lhs || !ir->rhs)
> > +      return false;
> > +
> > +   ir_dereference *rhs_deref = ir->rhs->as_dereference();
> > +   if (!rhs_deref)
> > +      return false;
> > +
> > +   ir_dereference *lhs_deref = ir->lhs->as_dereference();
> > +   if (!lhs_deref)
> > +      return false;
> > +
> > +   /* LHS and RHS must be SSBO variables */
> 
> Must they? In fact, the issue that prompted this was a copy from an
> SSBO to a CS shared variable in the ES3.1 CTS. Maybe just the source
> or dest being an SSBO is good enough?

Actually, I think we need to do this when the variable in the RHS is
backed by a buffer (if it is only the lhs, then we are fine because the
thing that creates the problem here is the lifespan of the loads until
we hit the writes), but not just with ssbos, but with any type of
variable that can be lowered to loads, so ubos, ssbos and shared
variables.

> Also, I just re-sent a piglit patch. In this case I just focused on
> testing that the shader linked. I tried the 'array' version with this
> patch, and it hit an assertion in nir_validate.

Yeah, I fixed this locally. This is because I was not reporting progress
properly, so the new nodes with the individual copies were not always
lowered.

> The copy 'struct' version still fails to register allocate.

Yes, I'll fix that too.

> -Jordan
> 
> > +   ir_variable *lhs_var = ir->lhs->variable_referenced();
> > +   if (!lhs_var || !lhs_var->is_in_shader_storage_block())
> > +      return false;
> > +
> > +   ir_variable *rhs_var = ir->rhs->variable_referenced();
> > +   if (!rhs_var || !rhs_var->is_in_shader_storage_block())
> > +      return false;
> > +
> > +   /* LHS and RHS must be variable dereferences.
> > +    * FIXME: arrays of arrays?
> > +    */
> > +   if (!ir->lhs->as_dereference_variable() ||
> > +       !ir->rhs->as_dereference_variable())
> > +      return false;
> > +
> > +   /* LHS and RHS must be arrays */
> > +   if (!rhs_var->type->is_array() || !lhs_var->type->is_array())
> > +      return false;
> > +
> > +   assert(lhs_deref->type->length == rhs_deref->type->length);
> > +
> > +   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
> > +      ir_dereference *lhs_i =
> > +         new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
> > +                                           new(mem_ctx) ir_constant(i));
> > +
> > +      ir_dereference *rhs_i =
> > +         new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
> > +                                           new(mem_ctx) ir_constant(i));
> > +      ir->insert_after(assign(lhs_i, rhs_i));
> > +   }
> > +
> > +   ir->remove();
> > +   return true;
> > +}
> > +
> >  ir_visitor_status
> >  lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
> >  {
> > +   /* Array copies could involve large amounts of SSBO load/store
> > +    * operations. To improve register pressure we want to special-case
> > +    * this and split the array copy into many individual element copies.
> > +    * This way we avoid emitting all the loads for the RHS first and
> > +    * all the writes for the LHS second.
> > +    */
> > +   if (check_for_ssbo_array_copy(ir))
> > +      return visit_continue_with_parent;
> > +
> >     check_ssbo_unsized_array_length_assignment(ir);
> >     check_for_ssbo_store(ir);
> >     return rvalue_visit(ir);
> > -- 
> > 1.9.1
> > 
>