[V3] Use a separate pattern for simd shuffle instead of binary pattern

Submitted by Guo Yejun on July 17, 2015, 7:49 a.m.

Details

Message ID 1437119384-17671-1-git-send-email-yejun.guo@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Guo Yejun July 17, 2015, 7:49 a.m.
the binary pattern assumes the two src operands have the same type,
while simd shuffle is not the case, so add a separate pattern for it.

v2: use a different way to obtain imm data
    do not use GenRegister::udxgrf, use sel.selReg instead
    add SimdShuffleInstruction::wellFormed
v3: refine SimdShuffleInstruction::wellFormed
    set dag.child[0]->root = 1

Signed-off-by: Guo Yejun <yejun.guo@intel.com>
---
 backend/src/backend/gen8_context.cpp               | 38 ++++++------
 backend/src/backend/gen8_context.hpp               |  1 +
 backend/src/backend/gen_context.cpp                | 68 ++++++++++++----------
 backend/src/backend/gen_context.hpp                |  1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
 backend/src/backend/gen_insn_selection.cpp         | 63 ++++++++++++++++----
 backend/src/backend/gen_insn_selection.hxx         |  2 +-
 backend/src/ir/instruction.cpp                     | 38 +++++++++++-
 backend/src/ir/instruction.hpp                     |  8 +++
 backend/src/ir/instruction.hxx                     |  2 +-
 10 files changed, 158 insertions(+), 64 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 66bb54a..b497ee5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -254,6 +254,27 @@  namespace gbe
     }
   }
 
+  void Gen8Context::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
+    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+    uint32_t simd = p->curr.execWidth;
+    if (src1.file == GEN_IMMEDIATE_VALUE) {
+      uint32_t offset = src1.value.ud % simd;
+      GenRegister reg = GenRegister::suboffset(src0, offset);
+      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+    } else {
+      uint32_t base = src0.nr * 32 + src0.subnr * 4;
+      GenRegister baseReg = GenRegister::immuw(base);
+      const GenRegister a0 = GenRegister::addr8(0);
+      p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+      GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+      p->MOV(dst, indirect);
+    }
+  }
+
   void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
@@ -273,23 +294,6 @@  namespace gbe
         p->ADD(dst, dst, src1);
         break;
       }
-      case SEL_OP_SIMD_SHUFFLE:
-      {
-        uint32_t simd = p->curr.execWidth;
-        if (src1.file == GEN_IMMEDIATE_VALUE) {
-          uint32_t offset = src1.value.ud % simd;
-          GenRegister reg = GenRegister::suboffset(src0, offset);
-          p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-        } else {
-          uint32_t base = src0.nr * 32 + src0.subnr * 4;
-          GenRegister baseReg = GenRegister::immuw(base);
-          const GenRegister a0 = GenRegister::addr8(0);
-          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-          p->MOV(dst, indirect);
-        }
-        break;
-      }
       default:
         GenContext::emitBinaryInstruction(insn);
     }
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 4f164ce..84508e9 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -52,6 +52,7 @@  namespace gbe
 
     virtual void emitUnaryInstruction(const SelectionInstruction &insn);
     virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
+    virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index db27377..e16b0a9 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -545,6 +545,42 @@  namespace gbe
     }
   }
 
+  void GenContext::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
+    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+    uint32_t simd = p->curr.execWidth;
+    if (src1.file == GEN_IMMEDIATE_VALUE) {
+      uint32_t offset = src1.value.ud % simd;
+      GenRegister reg = GenRegister::suboffset(src0, offset);
+      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+    } else {
+      uint32_t base = src0.nr * 32 + src0.subnr * 4;
+      GenRegister baseReg = GenRegister::immuw(base);
+      const GenRegister a0 = GenRegister::addr8(0);
+
+      p->push();
+        if (simd == 8) {
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+          p->MOV(dst, indirect);
+        } else if (simd == 16) {
+          p->curr.execWidth = 8;
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+          p->MOV(dst, indirect);
+
+          p->curr.quarterControl = 1;
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+        } else
+          NOT_IMPLEMENTED;
+      p->pop();
+    }
+  }
+
   void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
@@ -595,38 +631,6 @@  namespace gbe
           p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
         }
         break;
-      case SEL_OP_SIMD_SHUFFLE:
-        {
-          uint32_t simd = p->curr.execWidth;
-          if (src1.file == GEN_IMMEDIATE_VALUE) {
-            uint32_t offset = src1.value.ud % simd;
-            GenRegister reg = GenRegister::suboffset(src0, offset);
-            p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-          } else {
-            uint32_t base = src0.nr * 32 + src0.subnr * 4;
-            GenRegister baseReg = GenRegister::immuw(base);
-            const GenRegister a0 = GenRegister::addr8(0);
-
-            p->push();
-              if (simd == 8) {
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-                p->MOV(dst, indirect);
-              } else if (simd == 16) {
-                p->curr.execWidth = 8;
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-                p->MOV(dst, indirect);
-
-                p->curr.quarterControl = 1;
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                p->MOV(GenRegister::offset(dst, 1, 0), indirect);
-              } else
-                NOT_IMPLEMENTED;
-            p->pop();
-          }
-        }
-        break;
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index d387387..69fe513 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -127,6 +127,7 @@  namespace gbe
     virtual void emitUnaryInstruction(const SelectionInstruction &insn);
     virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryInstruction(const SelectionInstruction &insn);
+    virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d054820..d073770 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -3,6 +3,7 @@  DECL_GEN7_SCHEDULE(Label,           0,         0,        0)
 DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)
 DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        40,      20)
 DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
+DECL_GEN7_SCHEDULE(SimdShuffle,     20,        4,        2)
 DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        40,      20)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64Shift,        20,        40,      20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index af5ab9c..b0ba9e3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -543,7 +543,6 @@  namespace gbe
     ALU1(RNDD)
     ALU1(RNDU)
     ALU2(MACH)
-    ALU2(SIMD_SHUFFLE)
     ALU1(LZD)
     ALU3(MAD)
     ALU2WithTemp(MUL_HI)
@@ -565,6 +564,8 @@  namespace gbe
 #undef ALU2WithTemp
 #undef ALU3
 #undef I64Shift
+    /*! simd shuffle */
+    void SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1);
     /*! Convert 64-bit integer to 32-bit float */
     void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);
     /*! Convert 64-bit integer to 32-bit float */
@@ -1652,6 +1653,14 @@  namespace gbe
     insn->src(2) = src2;
   }
 
+  void Selection::Opaque::SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1)
+  {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_SIMD_SHUFFLE, 1, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+  }
+
   void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
     insn->src(0) = src0;
@@ -2815,17 +2824,6 @@  namespace gbe
         case OP_UPSAMPLE_LONG:
           sel.UPSAMPLE_LONG(dst, src0, src1);
           break;
-        case OP_SIMD_SHUFFLE:
-          {
-            if (src1.file == GEN_IMMEDIATE_VALUE)
-              sel.SIMD_SHUFFLE(dst, src0, src1);
-            else {
-              GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD));
-              sel.SHL(shiftL, src1, GenRegister::immud(0x2));
-              sel.SIMD_SHUFFLE(dst, src0, shiftL);
-            }
-          }
-          break;
         default: NOT_IMPLEMENTED;
       }
       sel.pop();
@@ -4973,6 +4971,46 @@  namespace gbe
     }
   };
 
+  class SimdShuffleInstructionPattern : public SelectionPattern
+  {
+  public:
+    SimdShuffleInstructionPattern(void) : SelectionPattern(1,1) {
+      this->opcodes.push_back(ir::OP_SIMD_SHUFFLE);
+    }
+    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+      using namespace ir;
+      const ir::SimdShuffleInstruction &insn = cast<SimdShuffleInstruction>(dag.insn);
+      assert(insn.getOpcode() == OP_SIMD_SHUFFLE);
+      const Type type = insn.getType();
+      GenRegister dst  = sel.selReg(insn.getDst(0), type);
+      GenRegister src0  = sel.selReg(insn.getSrc(0), type);
+      GenRegister src1;
+
+      SelectionDAG *dag0 = dag.child[0];
+      SelectionDAG *dag1 = dag.child[1];
+      if (dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+        const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+        src1 = getRegisterFromImmediate(childInsn.getImmediate(), TYPE_U32);
+        if (dag0) dag0->isRoot = 1;
+      } else {
+        markAllChildren(dag);
+        src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
+      }
+
+      sel.push();
+      if (src1.file == GEN_IMMEDIATE_VALUE)
+        sel.SIMD_SHUFFLE(dst, src0, src1);
+      else {
+        GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+        sel.SHL(shiftL, src1, GenRegister::immud(0x2));
+        sel.SIMD_SHUFFLE(dst, src0, shiftL);
+      }
+      sel.pop();
+      return true;
+    }
+
+  };
+
   /*! Get a region of a register */
   class RegionInstructionPattern : public SelectionPattern
   {
@@ -5247,6 +5285,7 @@  namespace gbe
     this->insert<GetImageInfoInstructionPattern>();
     this->insert<ReadARFInstructionPattern>();
     this->insert<RegionInstructionPattern>();
+    this->insert<SimdShuffleInstructionPattern>();
     this->insert<IndirectMovInstructionPattern>();
     this->insert<NullaryInstructionPattern>();
 
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 79f2ce1..adbb137 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -26,7 +26,7 @@  DECL_SELECTION_IR(SHL, BinaryInstruction)
 DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
-DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
+DECL_SELECTION_IR(SIMD_SHUFFLE, SimdShuffleInstruction)
 DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
 DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
 DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 12d70a6..f93c528 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -741,6 +741,22 @@  namespace ir {
       Register src[0];
     };
 
+    class ALIGNED_INSTRUCTION SimdShuffleInstruction : public NaryInstruction<2>
+    {
+    public:
+      SimdShuffleInstruction(Type type,
+                        Register dst,
+                        Register src0,
+                        Register src1) {
+        this->opcode = OP_SIMD_SHUFFLE;
+        this->type = type;
+        this->dst[0] = dst;
+        this->src[0] = src0;
+        this->src[1] = src1;
+      }
+      INLINE bool wellFormed(const Function &fn, std::string &why) const;
+    };
+
     class ALIGNED_INSTRUCTION RegionInstruction :
       public BasePolicy,
       public NSrcPolicy<RegionInstruction, 1>,
@@ -1154,6 +1170,19 @@  namespace ir {
       return true;
     }
 
+    INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+    {
+      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this->type != TYPE_FLOAT)) {
+        whyNot = "Only support S32/U32/FLOAT type";
+        return false;
+      }
+
+      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[1], fn, whyNot) == false))
+        return false;
+
+      return true;
+    }
+
     INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
     {
       if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
@@ -1461,6 +1490,10 @@  START_INTROSPECTION(RegionInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(RegionInstruction)
 
+START_INTROSPECTION(SimdShuffleInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SimdShuffleInstruction)
+
 START_INTROSPECTION(IndirectMovInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(IndirectMovInstruction)
@@ -1652,6 +1685,7 @@  DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
 DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
 DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
 DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
+DECL_MEM_FN(SimdShuffleInstruction, Type, getType(void), getType())
 DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
 DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset())
 DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType())
@@ -1751,7 +1785,6 @@  DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   DECL_EMIT_FUNCTION(RHADD)
   DECL_EMIT_FUNCTION(I64HADD)
   DECL_EMIT_FUNCTION(I64RHADD)
-  DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
 
 #undef DECL_EMIT_FUNCTION
 
@@ -1881,6 +1914,9 @@  DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   Instruction REGION(Register dst, Register src, uint32_t offset) {
     return internal::RegionInstruction(dst, src, offset).convert();
   }
+  Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1) {
+    return internal::SimdShuffleInstruction(type, dst, src0, src1).convert();
+  }
 
   Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register src1, uint32_t offset) {
     return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index ec4d00d..cf8d839 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -522,6 +522,14 @@  namespace ir {
     static bool isClassOf(const Instruction &insn);
   };
 
+  /*! simd shuffle */
+  class SimdShuffleInstruction : public Instruction {
+  public:
+    Type getType(void) const;
+    /*! Return true if the given instruction is an instance of this class */
+    static bool isClassOf(const Instruction &insn);
+  };
+
   /*! return a region of a register, make sure the offset does not exceed the register size */
   class RegionInstruction : public Instruction {
   public:
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 1001837..81548c9 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -59,7 +59,7 @@  DECL_INSN(BSB, BinaryInstruction)
 DECL_INSN(OR, BinaryInstruction)
 DECL_INSN(XOR, BinaryInstruction)
 DECL_INSN(AND, BinaryInstruction)
-DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
+DECL_INSN(SIMD_SHUFFLE, SimdShuffleInstruction)
 DECL_INSN(SEL, SelectInstruction)
 DECL_INSN(EQ, CompareInstruction)
 DECL_INSN(NE, CompareInstruction)

Comments

LGTM

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> Guo Yejun

> Sent: Friday, July 17, 2015 3:50 PM

> To: beignet@lists.freedesktop.org

> Cc: Guo, Yejun

> Subject: [Beignet] [PATCH V3] Use a separate pattern for simd shuffle instead of

> binary pattern

> 

> the binary pattern assumes the two src operands have the same type, while simd

> shuffle is not the case, so add a separate pattern for it.

> 

> v2: use a different way to obtain imm data

>     do not use GenRegister::udxgrf, use sel.selReg instead

>     add SimdShuffleInstruction::wellFormed

> v3: refine SimdShuffleInstruction::wellFormed

>     set dag.child[0]->root = 1

> 

> Signed-off-by: Guo Yejun <yejun.guo@intel.com>

> ---

>  backend/src/backend/gen8_context.cpp               | 38 ++++++------

>  backend/src/backend/gen8_context.hpp               |  1 +

>  backend/src/backend/gen_context.cpp                | 68 ++++++++++++----------

>  backend/src/backend/gen_context.hpp                |  1 +

>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +

>  backend/src/backend/gen_insn_selection.cpp         | 63 ++++++++++++++++----

>  backend/src/backend/gen_insn_selection.hxx         |  2 +-

>  backend/src/ir/instruction.cpp                     | 38 +++++++++++-

>  backend/src/ir/instruction.hpp                     |  8 +++

>  backend/src/ir/instruction.hxx                     |  2 +-

>  10 files changed, 158 insertions(+), 64 deletions(-)

> 

> diff --git a/backend/src/backend/gen8_context.cpp

> b/backend/src/backend/gen8_context.cpp

> index 66bb54a..b497ee5 100644

> --- a/backend/src/backend/gen8_context.cpp

> +++ b/backend/src/backend/gen8_context.cpp

> @@ -254,6 +254,27 @@ namespace gbe

>      }

>    }

> 

> +  void Gen8Context::emitSimdShuffleInstruction(const SelectionInstruction

> &insn) {

> +    const GenRegister dst = ra->genReg(insn.dst(0));

> +    const GenRegister src0 = ra->genReg(insn.src(0));

> +    const GenRegister src1 = ra->genReg(insn.src(1));

> +    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);

> +

> +    uint32_t simd = p->curr.execWidth;

> +    if (src1.file == GEN_IMMEDIATE_VALUE) {

> +      uint32_t offset = src1.value.ud % simd;

> +      GenRegister reg = GenRegister::suboffset(src0, offset);

> +      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /

> typeSize(reg.type)), reg.type));

> +    } else {

> +      uint32_t base = src0.nr * 32 + src0.subnr * 4;

> +      GenRegister baseReg = GenRegister::immuw(base);

> +      const GenRegister a0 = GenRegister::addr8(0);

> +      p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> +      GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> +      p->MOV(dst, indirect);

> +    }

> +  }

> +

>    void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {

>      const GenRegister dst = ra->genReg(insn.dst(0));

>      const GenRegister src0 = ra->genReg(insn.src(0)); @@ -273,23 +294,6 @@

> namespace gbe

>          p->ADD(dst, dst, src1);

>          break;

>        }

> -      case SEL_OP_SIMD_SHUFFLE:

> -      {

> -        uint32_t simd = p->curr.execWidth;

> -        if (src1.file == GEN_IMMEDIATE_VALUE) {

> -          uint32_t offset = src1.value.ud % simd;

> -          GenRegister reg = GenRegister::suboffset(src0, offset);

> -          p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /

> typeSize(reg.type)), reg.type));

> -        } else {

> -          uint32_t base = src0.nr * 32 + src0.subnr * 4;

> -          GenRegister baseReg = GenRegister::immuw(base);

> -          const GenRegister a0 = GenRegister::addr8(0);

> -          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> -          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> -          p->MOV(dst, indirect);

> -        }

> -        break;

> -      }

>        default:

>          GenContext::emitBinaryInstruction(insn);

>      }

> diff --git a/backend/src/backend/gen8_context.hpp

> b/backend/src/backend/gen8_context.hpp

> index 4f164ce..84508e9 100644

> --- a/backend/src/backend/gen8_context.hpp

> +++ b/backend/src/backend/gen8_context.hpp

> @@ -52,6 +52,7 @@ namespace gbe

> 

>      virtual void emitUnaryInstruction(const SelectionInstruction &insn);

>      virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);

> +    virtual void emitSimdShuffleInstruction(const SelectionInstruction

> + &insn);

>      virtual void emitBinaryInstruction(const SelectionInstruction &insn);

>      virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);

>      virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); diff --

> git a/backend/src/backend/gen_context.cpp

> b/backend/src/backend/gen_context.cpp

> index db27377..e16b0a9 100644

> --- a/backend/src/backend/gen_context.cpp

> +++ b/backend/src/backend/gen_context.cpp

> @@ -545,6 +545,42 @@ namespace gbe

>      }

>    }

> 

> +  void GenContext::emitSimdShuffleInstruction(const SelectionInstruction &insn)

> {

> +    const GenRegister dst = ra->genReg(insn.dst(0));

> +    const GenRegister src0 = ra->genReg(insn.src(0));

> +    const GenRegister src1 = ra->genReg(insn.src(1));

> +    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);

> +

> +    uint32_t simd = p->curr.execWidth;

> +    if (src1.file == GEN_IMMEDIATE_VALUE) {

> +      uint32_t offset = src1.value.ud % simd;

> +      GenRegister reg = GenRegister::suboffset(src0, offset);

> +      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /

> typeSize(reg.type)), reg.type));

> +    } else {

> +      uint32_t base = src0.nr * 32 + src0.subnr * 4;

> +      GenRegister baseReg = GenRegister::immuw(base);

> +      const GenRegister a0 = GenRegister::addr8(0);

> +

> +      p->push();

> +        if (simd == 8) {

> +          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> +          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> +          p->MOV(dst, indirect);

> +        } else if (simd == 16) {

> +          p->curr.execWidth = 8;

> +          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> +          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> +          p->MOV(dst, indirect);

> +

> +          p->curr.quarterControl = 1;

> +          p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> +          p->MOV(GenRegister::offset(dst, 1, 0), indirect);

> +        } else

> +          NOT_IMPLEMENTED;

> +      p->pop();

> +    }

> +  }

> +

>    void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {

>      const GenRegister dst = ra->genReg(insn.dst(0));

>      const GenRegister src0 = ra->genReg(insn.src(0)); @@ -595,38 +631,6 @@

> namespace gbe

>            p->MOV(xdst.bottom_half(), xsrc1.bottom_half());

>          }

>          break;

> -      case SEL_OP_SIMD_SHUFFLE:

> -        {

> -          uint32_t simd = p->curr.execWidth;

> -          if (src1.file == GEN_IMMEDIATE_VALUE) {

> -            uint32_t offset = src1.value.ud % simd;

> -            GenRegister reg = GenRegister::suboffset(src0, offset);

> -            p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /

> typeSize(reg.type)), reg.type));

> -          } else {

> -            uint32_t base = src0.nr * 32 + src0.subnr * 4;

> -            GenRegister baseReg = GenRegister::immuw(base);

> -            const GenRegister a0 = GenRegister::addr8(0);

> -

> -            p->push();

> -              if (simd == 8) {

> -                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> -                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> -                p->MOV(dst, indirect);

> -              } else if (simd == 16) {

> -                p->curr.execWidth = 8;

> -                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> -                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);

> -                p->MOV(dst, indirect);

> -

> -                p->curr.quarterControl = 1;

> -                p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr /

> typeSize(GEN_TYPE_UW)), baseReg);

> -                p->MOV(GenRegister::offset(dst, 1, 0), indirect);

> -              } else

> -                NOT_IMPLEMENTED;

> -            p->pop();

> -          }

> -        }

> -        break;

>        default: NOT_IMPLEMENTED;

>      }

>    }

> diff --git a/backend/src/backend/gen_context.hpp

> b/backend/src/backend/gen_context.hpp

> index d387387..69fe513 100644

> --- a/backend/src/backend/gen_context.hpp

> +++ b/backend/src/backend/gen_context.hpp

> @@ -127,6 +127,7 @@ namespace gbe

>      virtual void emitUnaryInstruction(const SelectionInstruction &insn);

>      virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);

>      virtual void emitBinaryInstruction(const SelectionInstruction &insn);

> +    virtual void emitSimdShuffleInstruction(const SelectionInstruction

> + &insn);

>      virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);

>      void emitTernaryInstruction(const SelectionInstruction &insn);

>      virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); diff --

> git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx

> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx

> index d054820..d073770 100644

> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx

> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx

> @@ -3,6 +3,7 @@ DECL_GEN7_SCHEDULE(Label,           0,         0,        0)

>  DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)

>  DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        40,      20)

>  DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)

> +DECL_GEN7_SCHEDULE(SimdShuffle,     20,        4,        2)

>  DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        40,      20)

>  DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)

>  DECL_GEN7_SCHEDULE(I64Shift,        20,        40,      20)

> diff --git a/backend/src/backend/gen_insn_selection.cpp

> b/backend/src/backend/gen_insn_selection.cpp

> index af5ab9c..b0ba9e3 100644

> --- a/backend/src/backend/gen_insn_selection.cpp

> +++ b/backend/src/backend/gen_insn_selection.cpp

> @@ -543,7 +543,6 @@ namespace gbe

>      ALU1(RNDD)

>      ALU1(RNDU)

>      ALU2(MACH)

> -    ALU2(SIMD_SHUFFLE)

>      ALU1(LZD)

>      ALU3(MAD)

>      ALU2WithTemp(MUL_HI)

> @@ -565,6 +564,8 @@ namespace gbe

>  #undef ALU2WithTemp

>  #undef ALU3

>  #undef I64Shift

> +    /*! simd shuffle */

> +    void SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1);

>      /*! Convert 64-bit integer to 32-bit float */

>      void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);

>      /*! Convert 64-bit integer to 32-bit float */ @@ -1652,6 +1653,14 @@

> namespace gbe

>      insn->src(2) = src2;

>    }

> 

> +  void Selection::Opaque::SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1)  {

> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_SIMD_SHUFFLE, 1, 2);

> +    insn->dst(0) = dst;

> +    insn->src(0) = src0;

> +    insn->src(1) = src1;

> +  }

> +

>    void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1,

> GenRegister tmp[3]) {

>      SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);

>      insn->src(0) = src0;

> @@ -2815,17 +2824,6 @@ namespace gbe

>          case OP_UPSAMPLE_LONG:

>            sel.UPSAMPLE_LONG(dst, src0, src1);

>            break;

> -        case OP_SIMD_SHUFFLE:

> -          {

> -            if (src1.file == GEN_IMMEDIATE_VALUE)

> -              sel.SIMD_SHUFFLE(dst, src0, src1);

> -            else {

> -              GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth,

> sel.reg(FAMILY_DWORD));

> -              sel.SHL(shiftL, src1, GenRegister::immud(0x2));

> -              sel.SIMD_SHUFFLE(dst, src0, shiftL);

> -            }

> -          }

> -          break;

>          default: NOT_IMPLEMENTED;

>        }

>        sel.pop();

> @@ -4973,6 +4971,46 @@ namespace gbe

>      }

>    };

> 

> +  class SimdShuffleInstructionPattern : public SelectionPattern  {

> +  public:

> +    SimdShuffleInstructionPattern(void) : SelectionPattern(1,1) {

> +      this->opcodes.push_back(ir::OP_SIMD_SHUFFLE);

> +    }

> +    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {

> +      using namespace ir;

> +      const ir::SimdShuffleInstruction &insn =

> cast<SimdShuffleInstruction>(dag.insn);

> +      assert(insn.getOpcode() == OP_SIMD_SHUFFLE);

> +      const Type type = insn.getType();

> +      GenRegister dst  = sel.selReg(insn.getDst(0), type);

> +      GenRegister src0  = sel.selReg(insn.getSrc(0), type);

> +      GenRegister src1;

> +

> +      SelectionDAG *dag0 = dag.child[0];

> +      SelectionDAG *dag1 = dag.child[1];

> +      if (dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI &&

> canGetRegisterFromImmediate(dag1->insn)) {

> +        const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);

> +        src1 = getRegisterFromImmediate(childInsn.getImmediate(), TYPE_U32);

> +        if (dag0) dag0->isRoot = 1;

> +      } else {

> +        markAllChildren(dag);

> +        src1 = sel.selReg(insn.getSrc(1), TYPE_U32);

> +      }

> +

> +      sel.push();

> +      if (src1.file == GEN_IMMEDIATE_VALUE)

> +        sel.SIMD_SHUFFLE(dst, src0, src1);

> +      else {

> +        GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);

> +        sel.SHL(shiftL, src1, GenRegister::immud(0x2));

> +        sel.SIMD_SHUFFLE(dst, src0, shiftL);

> +      }

> +      sel.pop();

> +      return true;

> +    }

> +

> +  };

> +

>    /*! Get a region of a register */

>    class RegionInstructionPattern : public SelectionPattern

>    {

> @@ -5247,6 +5285,7 @@ namespace gbe

>      this->insert<GetImageInfoInstructionPattern>();

>      this->insert<ReadARFInstructionPattern>();

>      this->insert<RegionInstructionPattern>();

> +    this->insert<SimdShuffleInstructionPattern>();

>      this->insert<IndirectMovInstructionPattern>();

>      this->insert<NullaryInstructionPattern>();

> 

> diff --git a/backend/src/backend/gen_insn_selection.hxx

> b/backend/src/backend/gen_insn_selection.hxx

> index 79f2ce1..adbb137 100644

> --- a/backend/src/backend/gen_insn_selection.hxx

> +++ b/backend/src/backend/gen_insn_selection.hxx

> @@ -26,7 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)

> DECL_SELECTION_IR(RSR, BinaryInstruction)  DECL_SELECTION_IR(RSL,

> BinaryInstruction)  DECL_SELECTION_IR(ASR, BinaryInstruction) -

> DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)

> +DECL_SELECTION_IR(SIMD_SHUFFLE, SimdShuffleInstruction)

>  DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)  DECL_SELECTION_IR(I64SHL,

> I64ShiftInstruction)  DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) diff --git

> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index

> 12d70a6..f93c528 100644

> --- a/backend/src/ir/instruction.cpp

> +++ b/backend/src/ir/instruction.cpp

> @@ -741,6 +741,22 @@ namespace ir {

>        Register src[0];

>      };

> 

> +    class ALIGNED_INSTRUCTION SimdShuffleInstruction : public

> NaryInstruction<2>

> +    {

> +    public:

> +      SimdShuffleInstruction(Type type,

> +                        Register dst,

> +                        Register src0,

> +                        Register src1) {

> +        this->opcode = OP_SIMD_SHUFFLE;

> +        this->type = type;

> +        this->dst[0] = dst;

> +        this->src[0] = src0;

> +        this->src[1] = src1;

> +      }

> +      INLINE bool wellFormed(const Function &fn, std::string &why) const;

> +    };

> +

>      class ALIGNED_INSTRUCTION RegionInstruction :

>        public BasePolicy,

>        public NSrcPolicy<RegionInstruction, 1>, @@ -1154,6 +1170,19 @@

> namespace ir {

>        return true;

>      }

> 

> +    INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn,

> std::string &whyNot) const

> +    {

> +      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this-

> >type != TYPE_FLOAT)) {

> +        whyNot = "Only support S32/U32/FLOAT type";

> +        return false;

> +      }

> +

> +      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[1], fn, whyNot) ==

> false))

> +        return false;

> +

> +      return true;

> +    }

> +

>      INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string

> &whyNot) const

>      {

>        if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) ==

> false)) @@ -1461,6 +1490,10 @@ START_INTROSPECTION(RegionInstruction)

>  #include "ir/instruction.hxx"

>  END_INTROSPECTION(RegionInstruction)

> 

> +START_INTROSPECTION(SimdShuffleInstruction)

> +#include "ir/instruction.hxx"

> +END_INTROSPECTION(SimdShuffleInstruction)

> +

>  START_INTROSPECTION(IndirectMovInstruction)

>  #include "ir/instruction.hxx"

>  END_INTROSPECTION(IndirectMovInstruction)

> @@ -1652,6 +1685,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex,

> getLabelIndex(void), getLabelIndex())  DECL_MEM_FN(SyncInstruction, uint32_t,

> getParameters(void), getParameters())  DECL_MEM_FN(ReadARFInstruction,

> Type, getType(void), getType())  DECL_MEM_FN(ReadARFInstruction,

> ARFRegister, getARFRegister(void), getARFRegister())

> +DECL_MEM_FN(SimdShuffleInstruction, Type, getType(void), getType())

>  DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())

> DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset())

> DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType()) @@ -

> 1751,7 +1785,6 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,

> getImageIndex(void), getImageIndex

>    DECL_EMIT_FUNCTION(RHADD)

>    DECL_EMIT_FUNCTION(I64HADD)

>    DECL_EMIT_FUNCTION(I64RHADD)

> -  DECL_EMIT_FUNCTION(SIMD_SHUFFLE)

> 

>  #undef DECL_EMIT_FUNCTION

> 

> @@ -1881,6 +1914,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,

> getImageIndex(void), getImageIndex

>    Instruction REGION(Register dst, Register src, uint32_t offset) {

>      return internal::RegionInstruction(dst, src, offset).convert();

>    }

> +  Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register

> src1) {

> +    return internal::SimdShuffleInstruction(type, dst, src0,

> + src1).convert();  }

> 

>    Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register

> src1, uint32_t offset) {

>      return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert();

> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp

> index ec4d00d..cf8d839 100644

> --- a/backend/src/ir/instruction.hpp

> +++ b/backend/src/ir/instruction.hpp

> @@ -522,6 +522,14 @@ namespace ir {

>      static bool isClassOf(const Instruction &insn);

>    };

> 

> +  /*! simd shuffle */

> +  class SimdShuffleInstruction : public Instruction {

> +  public:

> +    Type getType(void) const;

> +    /*! Return true if the given instruction is an instance of this class */

> +    static bool isClassOf(const Instruction &insn);  };

> +

>    /*! return a region of a register, make sure the offset does not exceed the

> register size */

>    class RegionInstruction : public Instruction {

>    public:

> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index

> 1001837..81548c9 100644

> --- a/backend/src/ir/instruction.hxx

> +++ b/backend/src/ir/instruction.hxx

> @@ -59,7 +59,7 @@ DECL_INSN(BSB, BinaryInstruction)  DECL_INSN(OR,

> BinaryInstruction)  DECL_INSN(XOR, BinaryInstruction)  DECL_INSN(AND,

> BinaryInstruction) -DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)

> +DECL_INSN(SIMD_SHUFFLE, SimdShuffleInstruction)

>  DECL_INSN(SEL, SelectInstruction)

>  DECL_INSN(EQ, CompareInstruction)

>  DECL_INSN(NE, CompareInstruction)

> --

> 1.9.1

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet