[11/14] Backend: Add sub groups short shuffle builtin functions

Submitted by Pan Xiuli on Oct. 12, 2016, 8:56 a.m.

Details

Message ID 1476262604-27504-11-git-send-email-xiuli.pan@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Beignet

Browsing this patch as part of:
"Series without cover letter" rev 1 in Beignet
<< prev patch [11/14] next patch >>

Commit Message

Pan Xiuli Oct. 12, 2016, 8:56 a.m.
From: Pan Xiuli <xiuli.pan@intel.com>

Add short type sub group shuffle(simd shuffle)

Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
---
 backend/src/ir/instruction.cpp           |  5 +++--
 backend/src/libocl/tmpl/ocl_simd.tmpl.cl |  6 ++++++
 backend/src/libocl/tmpl/ocl_simd.tmpl.h  | 11 +++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index ed64580..08a94cd 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1493,8 +1493,9 @@  namespace ir {
 
     INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn, std::string &whyNot) const
     {
-      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this->type != TYPE_FLOAT)) {
-        whyNot = "Only support S32/U32/FLOAT type";
+      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this->type != TYPE_FLOAT &&
+                    this->type != TYPE_U16 && this->type != TYPE_S16)) {
+        whyNot = "Only support S16/U16/S32/U32/FLOAT type";
         return false;
       }
 
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index 90c7cc2..9023107 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -283,6 +283,8 @@  OVERLOADABLE TYPE intel_sub_group_shuffle_down(TYPE x, TYPE y, uint c) { \
 SHUFFLE_DOWN(float)
 SHUFFLE_DOWN(int)
 SHUFFLE_DOWN(uint)
+SHUFFLE_DOWN(short)
+SHUFFLE_DOWN(ushort)
 #undef SHUFFLE_DOWN
 
 #define SHUFFLE_UP(TYPE) \
@@ -296,6 +298,8 @@  OVERLOADABLE TYPE intel_sub_group_shuffle_up(TYPE x, TYPE y, uint c) { \
 SHUFFLE_UP(float)
 SHUFFLE_UP(int)
 SHUFFLE_UP(uint)
+SHUFFLE_UP(short)
+SHUFFLE_UP(ushort)
 #undef SHUFFLE_UP
 #define SHUFFLE_XOR(TYPE) \
 OVERLOADABLE TYPE intel_sub_group_shuffle_xor(TYPE x, uint c) { \
@@ -304,4 +308,6 @@  OVERLOADABLE TYPE intel_sub_group_shuffle_xor(TYPE x, uint c) { \
 SHUFFLE_XOR(float)
 SHUFFLE_XOR(int)
 SHUFFLE_XOR(uint)
+SHUFFLE_XOR(short)
+SHUFFLE_XOR(ushort)
 #undef SHUFFLE_XOR
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index d0f06d1..158c8e1 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -169,15 +169,26 @@  OVERLOADABLE half intel_sub_group_shuffle(half x, uint c);
 OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
 OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);
 OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);
+OVERLOADABLE short intel_sub_group_shuffle(short x, uint c);
+OVERLOADABLE ushort intel_sub_group_shuffle(ushort x, uint c);
+
 OVERLOADABLE float intel_sub_group_shuffle_down(float x, float y, uint c);
 OVERLOADABLE int intel_sub_group_shuffle_down(int x, int y, uint c);
 OVERLOADABLE uint intel_sub_group_shuffle_down(uint x, uint y, uint c);
+OVERLOADABLE short intel_sub_group_shuffle_down(short x, short y, uint c);
+OVERLOADABLE ushort intel_sub_group_shuffle_down(ushort x, ushort y, uint c);
+
 OVERLOADABLE float intel_sub_group_shuffle_up(float x, float y, uint c);
 OVERLOADABLE int intel_sub_group_shuffle_up(int x, int y, uint c);
 OVERLOADABLE uint intel_sub_group_shuffle_up(uint x, uint y, uint c);
+OVERLOADABLE short intel_sub_group_shuffle_up(short x, short y, uint c);
+OVERLOADABLE ushort intel_sub_group_shuffle_up(ushort x, ushort y, uint c);
+
 OVERLOADABLE float intel_sub_group_shuffle_xor(float x, uint c);
 OVERLOADABLE int intel_sub_group_shuffle_xor(int x, uint c);
 OVERLOADABLE uint intel_sub_group_shuffle_xor(uint x, uint c);
+OVERLOADABLE short intel_sub_group_shuffle_xor(short x, uint c);
+OVERLOADABLE ushort intel_sub_group_shuffle_xor(ushort x, uint c);
 
 /* blocak read/write */
 OVERLOADABLE uint intel_sub_group_block_read(const global uint* p);