[Mesa-dev,64/95] i965/vec4: Fix SSBO loads for 64-bit data

Submitted by Iago Toral Quiroga on July 19, 2016, 10:41 a.m.

Details

Message ID 1468924892-6910-65-git-send-email-itoral@igalia.com
State New
Headers show
Series "i965 Haswell ARB_gpu_shader_fp64 / OpenGL 4.0" ( rev: 2 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga July 19, 2016, 10:41 a.m.
Same requirements as for UBO loads.
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 32 ++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 172bf48..5bc1fd5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -638,10 +638,10 @@  vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
                                nir->info.num_ssbos - 1);
       }
 
-      src_reg offset_reg;
+      src_reg offset_reg = src_reg(this, glsl_type::uint_type);
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u32[0]);
+         emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset->u32[0])));
       } else {
          offset_reg = get_nir_src(instr->src[1], 1);
       }
@@ -650,14 +650,34 @@  vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       const vec4_builder bld = vec4_builder(this).at_end()
          .annotate(current_annotation, base_ir);
 
-      src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
-                                              1 /* dims */, 4 /* size*/,
-                                              BRW_PREDICATE_NONE);
+      src_reg read_result;
       dst_reg dest = get_nir_dest(instr->dest);
+      if (type_sz(dest.type) < 8) {
+         read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                         1 /* dims */, 4 /* size*/,
+                                         BRW_PREDICATE_NONE);
+      } else {
+         src_reg shuffled = src_reg(this, glsl_type::dvec4_type);
+
+         src_reg temp;
+         temp = emit_untyped_read(bld, surf_index, offset_reg,
+                                  1 /* dims */, 4 /* size*/,
+                                  BRW_PREDICATE_NONE);
+         emit(MOV(dst_reg(retype(shuffled, temp.type)), temp));
+
+         emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16)));
+         temp = emit_untyped_read(bld, surf_index, offset_reg,
+                                  1 /* dims */, 4 /* size*/,
+                                  BRW_PREDICATE_NONE);
+         emit(MOV(dst_reg(retype(offset(shuffled, 1), temp.type)), temp));
+
+         read_result = src_reg(this, glsl_type::dvec4_type);
+         shuffle_64bit_data(dst_reg(read_result), shuffled, false);
+      }
+
       read_result.type = dest.type;
       read_result.swizzle = brw_swizzle_for_size(instr->num_components);
       emit(MOV(dest, read_result));
-
       break;
    }