[Mesa-dev] i965/fs: Track output regs on a split virtual GRF basis.

Submitted by Eric Anholt on April 8, 2014, 10:09 p.m.

Details

Message ID 1396994995-18525-1-git-send-email-eric@anholt.net
State New
Headers show

Not browsing as part of any series.

Commit Message

Eric Anholt April 8, 2014, 10:09 p.m.
v2: Fix fragment program fragment.color output, fix smaller-than-vec4 dual
    src output codegen, use offset() a bit more.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |  2 +-
 src/mesa/drivers/dri/i965/brw_fs.h           |  5 +--
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp      | 18 ++++----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 64 +++++++++++++++-------------
 4 files changed, 46 insertions(+), 43 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 85a5463..972d4a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1732,7 +1732,7 @@  fs_visitor::compact_virtual_grfs()
       { &pixel_y, 1 },
       { &pixel_w, 1 },
       { &wpos_w, 1 },
-      { &dual_src_output, 1 },
+      { dual_src_output, ARRAY_SIZE(dual_src_output) },
       { outputs, ARRAY_SIZE(outputs) },
       { delta_x, ARRAY_SIZE(delta_x) },
       { delta_y, ARRAY_SIZE(delta_y) },
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 3d21ee5..d6dfde4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -526,9 +526,8 @@  public:
    struct hash_table *variable_ht;
    fs_reg frag_depth;
    fs_reg sample_mask;
-   fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
-   unsigned output_components[BRW_MAX_DRAW_BUFFERS];
-   fs_reg dual_src_output;
+   fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4];
+   fs_reg dual_src_output[4];
    bool do_dual_src;
    int first_non_payload_grf;
    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index 49eaf05..c6f063e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -646,25 +646,25 @@  fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
          return frag_depth;
       } else if (dst->Index == FRAG_RESULT_COLOR) {
          if (outputs[0].file == BAD_FILE) {
-            outputs[0] = fs_reg(this, glsl_type::vec4_type);
-            output_components[0] = 4;
+            fs_reg reg = fs_reg(this, glsl_type::vec4_type);
 
             /* Tell emit_fb_writes() to smear fragment.color across all the
              * color attachments.
              */
-            for (int i = 1; i < c->key.nr_color_regions; i++) {
-               outputs[i] = outputs[0];
-               output_components[i] = output_components[0];
+            for (int i = 0; i < c->key.nr_color_regions; i++) {
+               for (int j = 0; j < 4; j++)
+                  outputs[i * 4 + j] = offset(reg, j);
             }
          }
          return outputs[0];
       } else {
          int output_index = dst->Index - FRAG_RESULT_DATA0;
-         if (outputs[output_index].file == BAD_FILE) {
-            outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
+         if (outputs[output_index * 4].file == BAD_FILE) {
+            fs_reg reg = fs_reg(this, glsl_type::vec4_type);
+            for (int i = 0; i < 4; i++)
+               outputs[output_index * 4 + i] = offset(reg, i);
          }
-         output_components[output_index] = 4;
-         return outputs[output_index];
+         return outputs[output_index * 4];
       }
 
    case PROGRAM_UNDEFINED:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 63a0ae5..a017d55 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -70,17 +70,25 @@  fs_visitor::visit(ir_variable *ir)
    } else if (ir->data.mode == ir_var_shader_out) {
       reg = new(this->mem_ctx) fs_reg(this, ir->type);
 
+      int vector_elements =
+         ir->type->is_array() ? ir->type->fields.array->vector_elements
+         : ir->type->vector_elements;
+
       if (ir->data.index > 0) {
-	 assert(ir->data.location == FRAG_RESULT_DATA0);
-	 assert(ir->data.index == 1);
-	 this->dual_src_output = *reg;
+         assert(ir->data.location == FRAG_RESULT_DATA0);
+         assert(ir->data.index == 1);
+         for (unsigned i = 0; i < vector_elements; i++)
+            this->dual_src_output[i + ir->data.location_frac] = offset(*reg, i);
          this->do_dual_src = true;
       } else if (ir->data.location == FRAG_RESULT_COLOR) {
+         fs_reg chan = *reg;
 	 /* Writing gl_FragColor outputs to all color regions. */
-	 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
-	    this->outputs[i] = *reg;
-	    this->output_components[i] = 4;
-	 }
+         for (int j = 0; j < vector_elements; j++) {
+            for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
+               this->outputs[i * 4 + j + ir->data.location_frac] = chan;
+            }
+            chan.reg_offset++;
+         }
       } else if (ir->data.location == FRAG_RESULT_DEPTH) {
 	 this->frag_depth = *reg;
       } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
@@ -90,16 +98,16 @@  fs_visitor::visit(ir_variable *ir)
 	 assert(ir->data.location >= FRAG_RESULT_DATA0 &&
 		ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
 
-	 int vector_elements =
-	    ir->type->is_array() ? ir->type->fields.array->vector_elements
-				 : ir->type->vector_elements;
-
 	 /* General color output. */
 	 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
 	    int output = ir->data.location - FRAG_RESULT_DATA0 + i;
-	    this->outputs[output] = *reg;
-	    this->outputs[output].reg_offset += vector_elements * i;
-	    this->output_components[output] = vector_elements;
+            fs_reg out = *reg;
+            out.reg_offset += vector_elements * i;
+
+            for (int j = 0; j < vector_elements; j++) {
+               this->outputs[4 * output + j + ir->data.location_frac] = out;
+               out.reg_offset++;
+            }
 	 }
       }
    } else if (ir->data.mode == ir_var_uniform) {
@@ -2600,15 +2608,13 @@  fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
 {
    int reg_width = dispatch_width / 8;
    fs_inst *inst;
-   fs_reg color = outputs[target];
+   fs_reg color = outputs[target * 4 + index];
    fs_reg mrf;
 
    /* If there's no color data to be written, skip it. */
    if (color.file == BAD_FILE)
       return;
 
-   color.reg_offset += index;
-
    if (dispatch_width == 8 || brw->gen >= 6) {
       /* SIMD8 write looks like:
        * m + 0: r0
@@ -2709,8 +2715,7 @@  fs_visitor::emit_alpha_test()
                      BRW_CONDITIONAL_NEQ));
    } else {
       /* RT0 alpha */
-      fs_reg color = outputs[0];
-      color.reg_offset += 3;
+      fs_reg color = outputs[3];
 
       /* f0.1 &= func(color, ref) */
       cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref),
@@ -2815,23 +2820,23 @@  fs_visitor::emit_fb_writes()
    }
 
    if (do_dual_src) {
-      fs_reg src0 = this->outputs[0];
-      fs_reg src1 = this->dual_src_output;
-
       this->current_annotation = ralloc_asprintf(this->mem_ctx,
 						 "FB write src0");
       for (int i = 0; i < 4; i++) {
-	 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
-	 src0.reg_offset++;
-	 inst->saturate = c->key.clamp_fragment_color;
+         fs_reg src0 = this->outputs[0 * 4 + i];
+         if (src0.file != BAD_FILE) {
+            fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type),
+                                     src0));
+            inst->saturate = c->key.clamp_fragment_color;
+         }
       }
 
       this->current_annotation = ralloc_asprintf(this->mem_ctx,
 						 "FB write src1");
       for (int i = 0; i < 4; i++) {
+         fs_reg src1 = this->dual_src_output[i];
 	 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
                                   src1));
-	 src1.reg_offset++;
 	 inst->saturate = c->key.clamp_fragment_color;
       }
 
@@ -2864,8 +2869,7 @@  fs_visitor::emit_fb_writes()
       int write_color_mrf = color_mrf;
       if (src0_alpha_to_render_target && target != 0) {
          fs_inst *inst;
-         fs_reg color = outputs[0];
-         color.reg_offset += 3;
+         fs_reg color = outputs[3];
 
          inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
                          color));
@@ -2873,7 +2877,7 @@  fs_visitor::emit_fb_writes()
          write_color_mrf = color_mrf + reg_width;
       }
 
-      for (unsigned i = 0; i < this->output_components[target]; i++)
+      for (unsigned i = 0; i < 4; i++)
          emit_color_write(target, i, write_color_mrf);
 
       bool eot = false;
@@ -2966,7 +2970,7 @@  fs_visitor::fs_visitor(struct brw_context *brw,
                                        hash_table_pointer_compare);
 
    memset(this->outputs, 0, sizeof(this->outputs));
-   memset(this->output_components, 0, sizeof(this->output_components));
+   memset(this->dual_src_output, 0, sizeof(this->dual_src_output));
    this->first_non_payload_grf = 0;
    this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;