[Mesa-dev,63/95] i965/vec4: Fix UBO loads for 64-bit data

Submitted by Iago Toral Quiroga on July 19, 2016, 10:41 a.m.

Details

Message ID 1468924892-6910-64-git-send-email-itoral@igalia.com
State New
Headers show
Series "i965 Haswell ARB_gpu_shader_fp64 / OpenGL 4.0" ( rev: 2 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga July 19, 2016, 10:41 a.m.
We need to emit to 32-bit load messages to load a full dvec4. If only
1 or 2 double components are needed dead-code-elimination will remove
the second one.

We also need to shuffle the result of the 32-bit messages to form
valid 64-bit SIMD4x2 data.
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 45 ++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 346e822..172bf48 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -827,31 +827,48 @@  vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
                                nir->info.num_ubos - 1);
       }
 
-      src_reg offset;
+      src_reg offset_reg(this, glsl_type::uint_type);
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset = brw_imm_ud(const_offset->u32[0] & ~15);
+         emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset->u32[0] & ~15)));
       } else {
-         offset = get_nir_src(instr->src[1], nir_type_uint32, 1);
+         offset_reg = get_nir_src(instr->src[1], nir_type_uint32, 1);
       }
 
-      src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
-      packed_consts.type = dest.type;
+      src_reg packed_consts;
+      if (nir_dest_bit_size(instr->dest) == 32) {
+         packed_consts = src_reg(this, glsl_type::vec4_type);
+         emit_pull_constant_load_reg(dst_reg(packed_consts),
+                                     surf_index,
+                                     offset_reg,
+                                     NULL, NULL /* before_block/inst */);
+      } else {
+         src_reg temp = src_reg(this, glsl_type::dvec4_type);
+         src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
+
+         emit_pull_constant_load_reg(dst_reg(temp_float),
+                                     surf_index, offset_reg, NULL, NULL);
 
-      emit_pull_constant_load_reg(dst_reg(packed_consts),
-                                  surf_index,
-                                  offset,
-                                  NULL, NULL /* before_block/inst */);
+         emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
+         emit_pull_constant_load_reg(dst_reg(offset(temp_float, 1)),
+                                     surf_index, offset_reg, NULL, NULL);
+
+         packed_consts = src_reg(this, glsl_type::dvec4_type);
+         shuffle_64bit_data(dst_reg(packed_consts), temp, false);
+      }
 
       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
       if (const_offset) {
-         packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4,
-                                               const_offset->u32[0] % 16 / 4);
+         unsigned type_size = type_sz(dest.type);
+         packed_consts.swizzle +=
+            BRW_SWIZZLE4(const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size,
+                         const_offset->u32[0] % 16 / type_size);
       }
 
-      emit(MOV(dest, packed_consts));
+      emit(MOV(dest, retype(packed_consts, dest.type)));
+
       break;
    }