[Mesa-dev,14/95] i965/vec4: set correct register regions for 32-bit and 64-bit

Submitted by Iago Toral Quiroga on July 19, 2016, 10:40 a.m.

Details

Message ID 1468924892-6910-15-git-send-email-itoral@igalia.com
State New
Headers show
Series "i965 Haswell ARB_gpu_shader_fp64 / OpenGL 4.0" ( rev: 2 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga July 19, 2016, 10:40 a.m.
For 32-bit instructions we want to use <4,4,1> regions for VGRF
sources so we should really set a width of 4 (we were setting 8).

For 64-bit instructions we want to use a width of 2 because the
hardware uses 32-bit swizzles, meaning that we can only address 2
consecutive 64-bit components in a row. Also, Curro suggested that
the hardware is probably fixing the width to 2 for 64-bit instructions
anyway, so just go with that and use <2,2,1>.

Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp           | 22 ++++++++++++++++------
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  2 +-
 2 files changed, 17 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index d4b1e9c..cf32671 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1839,19 +1839,26 @@  vec4_visitor::convert_to_hw_regs()
          struct src_reg &src = inst->src[i];
          struct brw_reg reg;
          switch (src.file) {
-         case VGRF:
-            reg = brw_vec8_grf(src.nr + src.reg_offset, 0);
+         case VGRF: {
+            unsigned type_size = type_sz(src.type);
+            unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
+            reg = brw_vecn_grf(width, src.nr + src.reg_offset, 0);
             reg.type = src.type;
             reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
+            if (type_size == 8) {
+               reg.vstride = BRW_VERTICAL_STRIDE_2;
+            }
             break;
+         }
 
-         case UNIFORM:
+         case UNIFORM: {
+            unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(src.type));
             reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
                                       (src.nr + src.reg_offset) / 2,
                                       ((src.nr + src.reg_offset) % 2) * 4),
-                         0, 4, 1);
+                         0, width, 1);
             reg.type = src.type;
             reg.swizzle = src.swizzle;
             reg.abs = src.abs;
@@ -1860,6 +1867,7 @@  vec4_visitor::convert_to_hw_regs()
             /* This should have been moved to pull constants. */
             assert(!src.reladdr);
             break;
+         }
 
          case ARF:
          case FIXED_GRF:
@@ -1895,11 +1903,13 @@  vec4_visitor::convert_to_hw_regs()
       struct brw_reg reg;
 
       switch (inst->dst.file) {
-      case VGRF:
-         reg = brw_vec8_grf(dst.nr + dst.reg_offset, 0);
+      case VGRF: {
+         unsigned width = REG_SIZE / MAX2(4, type_sz(dst.type));
+         reg = brw_vecn_grf(width, dst.nr + dst.reg_offset, 0);
          reg.type = dst.type;
          reg.writemask = dst.writemask;
          break;
+      }
 
       case MRF:
          assert(((dst.nr + dst.reg_offset) & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d47b489..d0720a1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1501,7 +1501,7 @@  generate_code(struct brw_codegen *p,
       unsigned pre_emit_nr_insn = p->nr_insn;
       bool fix_exec_size = false;
 
-      if (dst.width == BRW_WIDTH_4) {
+      if (dst.width == BRW_WIDTH_4 && type_sz(dst.type) != 8) {
          /* This happens in attribute fixups for "dual instanced" geometry
           * shaders, since they use attributes that are vec4's.  Since the exec
           * width is only 4, it's essential that the caller set