[Mesa-dev,18/95] i965/vec4: add VEC4_OPCODE_PICK_{LOW, HIGH}_32BIT opcodes

Submitted by Iago Toral Quiroga on July 19, 2016, 10:40 a.m.

Details

Message ID 1468924892-6910-19-git-send-email-itoral@igalia.com
State New
Headers show
Series "i965 Haswell ARB_gpu_shader_fp64 / OpenGL 4.0" ( rev: 2 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga July 19, 2016, 10:40 a.m.
These opcodes will pick the low/high 32-bit in each 64-bit data element
using Align1 mode. We will use this, for example, to things like
unpackDouble2x32.

We can't do this in Align16 because we would need data to cross the
vec4 boundary.
---
 src/mesa/drivers/dri/i965/brw_defines.h          |  2 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 26 ++++++++++++++++++++++++
 4 files changed, 36 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 10db5f0..f2da29d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1096,6 +1096,8 @@  enum opcode {
    VEC4_OPCODE_UNPACK_UNIFORM,
    VEC4_OPCODE_DOUBLE_TO_FLOAT,
    VEC4_OPCODE_FLOAT_TO_DOUBLE,
+   VEC4_OPCODE_PICK_LOW_32BIT,
+   VEC4_OPCODE_PICK_HIGH_32BIT,
 
    FS_OPCODE_DDX_COARSE,
    FS_OPCODE_DDX_FINE,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 5fb1d68..d662e15 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -317,6 +317,10 @@  brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
       return "double_to_float";
    case VEC4_OPCODE_FLOAT_TO_DOUBLE:
       return "float_to_double";
+   case VEC4_OPCODE_PICK_LOW_32BIT:
+      return "pick_low_32bit";
+   case VEC4_OPCODE_PICK_HIGH_32BIT:
+      return "pick_high_32bit";
 
    case FS_OPCODE_DDX_COARSE:
       return "ddx_coarse";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index cf32671..87a93c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -245,6 +245,8 @@  vec4_instruction::can_do_writemask(const struct brw_device_info *devinfo)
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
    case VEC4_OPCODE_DOUBLE_TO_FLOAT:
    case VEC4_OPCODE_FLOAT_TO_DOUBLE:
+   case VEC4_OPCODE_PICK_LOW_32BIT:
+   case VEC4_OPCODE_PICK_HIGH_32BIT:
    case VS_OPCODE_PULL_CONSTANT_LOAD:
    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
@@ -498,6 +500,8 @@  vec4_visitor::opt_reduce_swizzle()
 
       case VEC4_OPCODE_FLOAT_TO_DOUBLE:
       case VEC4_OPCODE_DOUBLE_TO_FLOAT:
+      case VEC4_OPCODE_PICK_LOW_32BIT:
+      case VEC4_OPCODE_PICK_HIGH_32BIT:
          swizzle = brw_swizzle_for_size(4);
          break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d0720a1..9421ee5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1954,6 +1954,32 @@  generate_code(struct brw_codegen *p,
          break;
       }
 
+      case VEC4_OPCODE_PICK_LOW_32BIT:
+      case VEC4_OPCODE_PICK_HIGH_32BIT: {
+         /* Stores the low/high 32-bit of each 64-bit element in src[0] into
+          * dst using ALIGN1 mode and a <8,4,2>:UD region on the source.
+          */
+         assert(type_sz(src[0].type) == 8);
+         assert(type_sz(dst.type) == 4);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         dst = retype(dst, BRW_REGISTER_TYPE_UD);
+         dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+         dst.width = BRW_WIDTH_8;
+
+         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
+         if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
+            src[0] = get_element_ud(src[0], 1);
+         src[0].vstride = BRW_VERTICAL_STRIDE_8;
+         src[0].width = BRW_WIDTH_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
+         brw_MOV(p, dst, src[0]);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
       case VEC4_OPCODE_PACK_BYTES: {
          /* Is effectively:
           *