[Mesa-dev,10/11] glsl_to_tgsi: use TGSI array declarations for GS, FS arrays of inputs

Submitted by Marek Olšák on May 24, 2015, 11:19 a.m.

Details

Message ID 1432466370-3869-11-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák May 24, 2015, 11:19 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 215 ++++++++++++++++++++++++++---
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   1 +
 src/mesa/state_tracker/st_program.c        |   9 ++
 3 files changed, 202 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d0880cd..c84e023 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -84,6 +84,7 @@  public:
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg(gl_register_file file, int index, int type)
@@ -98,6 +99,7 @@  public:
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg(gl_register_file file, int index, int type, int index2D)
@@ -112,6 +114,7 @@  public:
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg()
@@ -126,6 +129,7 @@  public:
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    explicit st_src_reg(st_dst_reg reg);
@@ -145,6 +149,7 @@  public:
     * currently used for input mapping only.
     */
    bool double_reg2;
+   unsigned array_id;
 };
 
 class st_dst_reg {
@@ -202,6 +207,7 @@  st_src_reg::st_src_reg(st_dst_reg reg)
    this->reladdr2 = NULL;
    this->has_index2 = false;
    this->double_reg2 = false;
+   this->array_id = 0;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
@@ -212,6 +218,7 @@  st_dst_reg::st_dst_reg(st_src_reg reg)
    this->writemask = WRITEMASK_XYZW;
    this->cond_mask = COND_TR;
    this->reladdr = reg.reladdr;
+   assert(reg.array_id == 0);
 }
 
 class glsl_to_tgsi_instruction : public exec_node {
@@ -239,8 +246,9 @@  public:
 
 class variable_storage : public exec_node {
 public:
-   variable_storage(ir_variable *var, gl_register_file file, int index)
-      : file(file), index(index), var(var)
+   variable_storage(ir_variable *var, gl_register_file file, int index,
+                    unsigned array_id = 0)
+      : file(file), index(index), var(var), array_id(array_id)
    {
       /* empty */
    }
@@ -248,6 +256,7 @@  public:
    gl_register_file file;
    int index;
    ir_variable *var; /* variable that maps to this, if any */
+   unsigned array_id;
 };
 
 class immediate_storage : public exec_node {
@@ -300,6 +309,12 @@  public:
 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 
+struct array_decl {
+   unsigned mesa_index;
+   unsigned array_id;
+   unsigned array_size;
+};
+
 struct glsl_to_tgsi_visitor : public ir_visitor {
 public:
    glsl_to_tgsi_visitor();
@@ -318,6 +333,9 @@  public:
    unsigned array_sizes[PIPE_MAX_SHADER_ARRAYS];
    unsigned next_array;
 
+   struct array_decl input_arrays[PIPE_MAX_SHADER_ARRAYS];
+   unsigned num_input_arrays;
+
    int num_address_regs;
    int samplers_used;
    bool indirect_addr_consts;
@@ -2189,11 +2207,38 @@  glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
    this->result = src;
 }
 
+/* Test if the variable is an array. Note that geometry and
+ * tessellation shader inputs are outputs are always arrays (except
+ * for patch inputs), so only the array element type is considered.
+ */
+static bool
+is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
+{
+   const glsl_type *type = var->type;
+
+   if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
+       (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
+      return false;
+
+   *is_2d = false;
+
+   if (stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) {
+      if (!var->type->is_array())
+         return false; /* a system value probably */
+
+      type = var->type->fields.array;
+      *is_2d = true;
+   }
+
+   return type->is_array() || type->is_matrix();
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
 {
    variable_storage *entry = find_variable_storage(ir->var);
    ir_variable *var = ir->var;
+   bool is_2d;
 
    if (!entry) {
       switch (var->data.mode) {
@@ -2209,9 +2254,29 @@  glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           * user-defined varyings.
           */
          assert(var->data.location != -1);
-         entry = new(mem_ctx) variable_storage(var,
-                                               PROGRAM_INPUT,
-                                               var->data.location);
+
+         if (is_inout_array(shader->Stage, var, &is_2d)) {
+            struct array_decl *decl = &input_arrays[num_input_arrays];
+
+            decl->mesa_index = var->data.location;
+            decl->array_id = num_input_arrays + 1;
+            if (is_2d)
+               decl->array_size = type_size(var->type->fields.array);
+            else
+               decl->array_size = type_size(var->type);
+            num_input_arrays++;
+
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_INPUT,
+                                                  var->data.location,
+                                                  decl->array_id);
+         }
+         else {
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_INPUT,
+                                                  var->data.location);
+         }
+         this->variables.push_tail(entry);
          break;
       case ir_var_shader_out:
          assert(var->data.location != -1);
@@ -2242,10 +2307,43 @@  glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
+   this->result.array_id = entry->array_id;
    if (!native_integers)
       this->result.type = GLSL_TYPE_FLOAT;
 }
 
+static void
+shrink_array_declarations(struct array_decl *arrays, unsigned count,
+                          GLbitfield64 usage_mask)
+{
+   unsigned i, j;
+
+   /* Fix array declarations by removing unused array elements at both ends
+    * of the arrays. For example, mat4[3] where only mat[1] is used.
+    */
+   for (i = 0; i < count; i++) {
+      struct array_decl *decl = &arrays[i];
+
+      /* Shrink the beginning. */
+      for (j = 0; j < decl->array_size; j++) {
+         if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+            break;
+
+         decl->mesa_index++;
+         decl->array_size--;
+         j--;
+      }
+
+      /* Shrink the end. */
+      for (j = decl->array_size-1; j >= 0; j--) {
+         if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+            break;
+
+         decl->array_size--;
+      }
+   }
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
 {
@@ -3291,6 +3389,7 @@  glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    result.file = PROGRAM_UNDEFINED;
    next_temp = 1;
    next_array = 0;
+   num_input_arrays = 0;
    next_signature_id = 1;
    num_immediates = 0;
    current_function = NULL;
@@ -3680,6 +3779,7 @@  glsl_to_tgsi_visitor::copy_propagate(void)
             inst->src[r].index2D = first->src[0].index2D;
             inst->src[r].has_index2 = first->src[0].has_index2;
             inst->src[r].double_reg2 = first->src[0].double_reg2;
+            inst->src[r].array_id = first->src[0].array_id;
 
             int swizzle = 0;
             for (int i = 0; i < 4; i++) {
@@ -4320,6 +4420,7 @@  struct st_translate {
    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
    unsigned array_sizes[PIPE_MAX_SHADER_ARRAYS];
+   struct array_decl input_arrays[PIPE_MAX_SHADER_ARRAYS];
 
    const GLuint *inputMapping;
    const GLuint *outputMapping;
@@ -4544,8 +4645,20 @@  src_register(struct st_translate *t, const st_src_reg *reg)
        * map back to the original index and add the offset after
        * mapping. */
       index -= double_reg2;
-      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
-      return t->inputs[t->inputMapping[index] + double_reg2];
+      if (!reg->array_id) {
+         assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
+         assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
+         return t->inputs[t->inputMapping[index]];
+      }
+      else {
+         struct array_decl *decl = &t->input_arrays[reg->array_id-1];
+         unsigned mesa_index = decl->mesa_index;
+         int slot = t->inputMapping[mesa_index];
+
+         assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
+         assert(t->inputs[slot].ArrayID == reg->array_id);
+         return ureg_src_array_offset(t->inputs[slot], index - mesa_index);
+      }
 
    case PROGRAM_OUTPUT:
       assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
@@ -5006,6 +5119,25 @@  emit_edgeflags(struct st_translate *t)
    ureg_MOV(ureg, edge_dst, edge_src);
 }
 
+static bool
+find_array(unsigned attr, struct array_decl *arrays, unsigned count,
+           unsigned *array_id, unsigned *array_size)
+{
+   unsigned i;
+
+   for (i = 0; i < count; i++) {
+      struct array_decl *decl = &arrays[i];
+
+      if (attr == decl->mesa_index) {
+         *array_id = decl->array_id;
+         *array_size = decl->array_size;
+         assert(*array_size);
+         return true;
+      }
+   }
+   return false;
+}
+
 /**
  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  * \param program  the program to translate
@@ -5035,6 +5167,7 @@  st_translate_program(
    const struct gl_program *proginfo,
    GLuint numInputs,
    const GLuint inputMapping[],
+   const GLuint inputSlotToAttr[],
    const ubyte inputSemanticName[],
    const ubyte inputSemanticIndex[],
    const GLuint interpMode[],
@@ -5088,15 +5221,57 @@  st_translate_program(
    /*
     * Declare input attributes.
     */
-   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+   switch (procType) {
+   case TGSI_PROCESSOR_FRAGMENT:
       for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
-                                                        inputSemanticName[i],
-                                                        inputSemanticIndex[i],
-                                                        interpMode[i], 0,
-                                                        interpLocation[i], 0, 1);
+         unsigned array_id = 0;
+         unsigned array_size;
+
+         if (find_array(inputSlotToAttr[i], program->input_arrays,
+                        program->num_input_arrays, &array_id, &array_size)) {
+            /* We've found an array. Declare it so. */
+            t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
+                              inputSemanticName[i], inputSemanticIndex[i],
+                              interpMode[i], 0, interpLocation[i],
+                              array_id, array_size);
+            i += array_size - 1;
+         }
+         else {
+            t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
+                              inputSemanticName[i], inputSemanticIndex[i],
+                              interpMode[i], 0, interpLocation[i], 0, 1);
+         }
       }
+      break;
+   case TGSI_PROCESSOR_GEOMETRY:
+      for (i = 0; i < numInputs; i++) {
+         unsigned array_id = 0;
+         unsigned array_size;
+
+         if (find_array(inputSlotToAttr[i], program->input_arrays,
+                        program->num_input_arrays, &array_id, &array_size)) {
+            /* We've found an array. Declare it so. */
+            t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           array_id, array_size);
+            i += array_size - 1;
+         }
+         else {
+            t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
+                                           inputSemanticIndex[i], 0, 1);
+         }
+      }
+      break;
+   case TGSI_PROCESSOR_VERTEX:
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+      break;
+   default:
+      assert(0);
+   }
 
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
       if (proginfo->InputsRead & VARYING_BIT_POS) {
           /* Must do this after setting up t->inputs. */
           emit_wpos(st_context(ctx), t, proginfo, ureg,
@@ -5145,12 +5320,6 @@  st_translate_program(
       }
    }
    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
-      for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_input(ureg,
-                                        inputSemanticName[i],
-                                        inputSemanticIndex[i], 0, 1);
-      }
-
       for (i = 0; i < numOutputs; i++) {
          t->outputs[i] = ureg_DECL_output(ureg,
                                           outputSemanticName[i],
@@ -5160,10 +5329,6 @@  st_translate_program(
    else {
       assert(procType == TGSI_PROCESSOR_VERTEX);
 
-      for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
-      }
-
       for (i = 0; i < numOutputs; i++) {
          t->outputs[i] = ureg_DECL_output(ureg,
                                           outputSemanticName[i],
@@ -5225,6 +5390,8 @@  st_translate_program(
    /* Copy over array sizes
     */
    memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
+   memcpy(t->input_arrays, program->input_arrays,
+          sizeof(program->input_arrays[0]) * program->num_input_arrays);
 
    /* Emit constants and uniforms.  TGSI uses a single index space for these,
     * so we put all the translated regs in t->constants.
@@ -5473,6 +5640,8 @@  get_mesa_program(struct gl_context *ctx,
    prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog, shader->Stage);
+   shrink_array_declarations(v->input_arrays, v->num_input_arrays,
+                             prog->InputsRead);
    count_resources(v, prog);
 
    /* This must be done before the uniform storage is associated. */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 2cb80bc..d753635 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -43,6 +43,7 @@  enum pipe_error st_translate_program(
    const struct gl_program *proginfo,
    GLuint numInputs,
    const GLuint inputMapping[],
+   const GLuint inputSlotToAttr[],
    const ubyte inputSemanticName[],
    const ubyte inputSemanticIndex[],
    const GLuint interpMode[],
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index a9110d3..098a931 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -351,6 +351,7 @@  st_translate_vertex_program(struct st_context *st,
                                    /* inputs */
                                    vpv->num_inputs,
                                    stvp->input_to_index,
+                                   NULL, /* inputSlotToAttr */
                                    NULL, /* input semantic name */
                                    NULL, /* input semantic index */
                                    NULL, /* interp mode */
@@ -482,6 +483,7 @@  st_translate_fragment_program(struct st_context *st,
 
    GLuint outputMapping[FRAG_RESULT_MAX];
    GLuint inputMapping[VARYING_SLOT_MAX];
+   GLuint inputSlotToAttr[VARYING_SLOT_MAX];
    GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
    GLuint interpLocation[PIPE_MAX_SHADER_INPUTS];
    GLuint attr;
@@ -502,6 +504,7 @@  st_translate_fragment_program(struct st_context *st,
       return NULL;
 
    assert(!(key->bitmap && key->drawpixels));
+   memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 
    if (key->bitmap) {
       /* glBitmap drawing */
@@ -543,6 +546,7 @@  st_translate_fragment_program(struct st_context *st,
          const GLuint slot = fs_num_inputs++;
 
          inputMapping[attr] = slot;
+         inputSlotToAttr[slot] = attr;
          if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr))
             interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID;
          else if (stfp->Base.IsSample & BITFIELD64_BIT(attr))
@@ -778,6 +782,7 @@  st_translate_fragment_program(struct st_context *st,
                            /* inputs */
                            fs_num_inputs,
                            inputMapping,
+                           inputSlotToAttr,
                            input_semantic_name,
                            input_semantic_index,
                            interpMode,
@@ -867,6 +872,7 @@  st_translate_geometry_program(struct st_context *st,
                               struct st_geometry_program *stgp,
                               const struct st_gp_variant_key *key)
 {
+   GLuint inputSlotToAttr[VARYING_SLOT_MAX];
    GLuint inputMapping[VARYING_SLOT_MAX];
    GLuint outputMapping[VARYING_SLOT_MAX];
    struct pipe_context *pipe = st->pipe;
@@ -896,6 +902,7 @@  st_translate_geometry_program(struct st_context *st,
       return NULL;
    }
 
+   memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
    memset(inputMapping, 0, sizeof(inputMapping));
    memset(outputMapping, 0, sizeof(outputMapping));
 
@@ -907,6 +914,7 @@  st_translate_geometry_program(struct st_context *st,
          const GLuint slot = gs_num_inputs++;
 
          inputMapping[attr] = slot;
+         inputSlotToAttr[slot] = attr;
 
          switch (attr) {
          case VARYING_SLOT_PRIMITIVE_ID:
@@ -1080,6 +1088,7 @@  st_translate_geometry_program(struct st_context *st,
                         /* inputs */
                         gs_num_inputs,
                         inputMapping,
+                        inputSlotToAttr,
                         input_semantic_name,
                         input_semantic_index,
                         NULL,