[09/14] Backend: Add short sub group builtin functions

Submitted by Pan Xiuli on Oct. 12, 2016, 8:56 a.m.

Details

Message ID 1476262604-27504-9-git-send-email-xiuli.pan@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Beignet

Browsing this patch as part of:
"Series without cover letter" rev 1 in Beignet
<< prev patch [9/14] next patch >>

Commit Message

Pan Xiuli Oct. 12, 2016, 8:56 a.m.
From: Pan Xiuli <xiuli.pan@intel.com>

Add intel sub group short type builtins.

Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
---
 backend/src/backend/gen_context.cpp      | 12 +++++++++
 backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 44 ++++++++++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_simd.tmpl.h  | 36 ++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

Patch hide | download patch | download mbox

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e907931..a1ae5ea 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2878,6 +2878,10 @@  namespace gbe
         p->MOV(dataReg, GenRegister::immint64(0x0));
       else if (dataReg.type == GEN_TYPE_UL)
         p->MOV(dataReg, GenRegister::immuint64(0x0));
+      else if (dataReg.type == GEN_TYPE_W)
+        p->MOV(dataReg, GenRegister::immw(0x0));
+      else if (dataReg.type == GEN_TYPE_UW)
+        p->MOV(dataReg, GenRegister::immuw(0x0));
       else
         GBE_ASSERT(0); /* unsupported data-type */
     }
@@ -2896,6 +2900,10 @@  namespace gbe
         p->MOV(dataReg, GenRegister::immint64(0x7FFFFFFFFFFFFFFFL));
       else if (dataReg.type == GEN_TYPE_UL)
         p->MOV(dataReg, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFL));
+      else if (dataReg.type == GEN_TYPE_W)
+        p->MOV(dataReg, GenRegister::immw(0x7FFF));
+      else if (dataReg.type == GEN_TYPE_UW)
+        p->MOV(dataReg, GenRegister::immuw(0xFFFF));
       else
         GBE_ASSERT(0); /* unsupported data-type */
     }
@@ -2914,6 +2922,10 @@  namespace gbe
         p->MOV(dataReg, GenRegister::immint64(0x8000000000000000L));
       else if (dataReg.type == GEN_TYPE_UL)
         p->MOV(dataReg, GenRegister::immuint64(0x0));
+      else if (dataReg.type == GEN_TYPE_W)
+        p->MOV(dataReg, GenRegister::immw(0x8000));
+      else if (dataReg.type == GEN_TYPE_UW)
+        p->MOV(dataReg, GenRegister::immuw(0x0));
       else
         GBE_ASSERT(0); /* unsupported data-type */
     }
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index d1bcfa3..90c7cc2 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -76,6 +76,8 @@  RANGE_OP(reduce, add, ulong, false)
 RANGE_OP(reduce, add, half, true)
 RANGE_OP(reduce, add, float, true)
 RANGE_OP(reduce, add, double, true)
+RANGE_OP(reduce, add, short, true)
+RANGE_OP(reduce, add, ushort, false)
 /* reduce min */
 RANGE_OP(reduce, min, int, true)
 RANGE_OP(reduce, min, uint, false)
@@ -84,6 +86,8 @@  RANGE_OP(reduce, min, ulong, false)
 RANGE_OP(reduce, min, half, true)
 RANGE_OP(reduce, min, float, true)
 RANGE_OP(reduce, min, double, true)
+RANGE_OP(reduce, min, short, true)
+RANGE_OP(reduce, min, ushort, false)
 /* reduce max */
 RANGE_OP(reduce, max, int, true)
 RANGE_OP(reduce, max, uint, false)
@@ -92,6 +96,8 @@  RANGE_OP(reduce, max, ulong, false)
 RANGE_OP(reduce, max, half, true)
 RANGE_OP(reduce, max, float, true)
 RANGE_OP(reduce, max, double, true)
+RANGE_OP(reduce, max, short, true)
+RANGE_OP(reduce, max, ushort, false)
 
 /* scan_inclusive add */
 RANGE_OP(scan_inclusive, add, int, true)
@@ -101,6 +107,8 @@  RANGE_OP(scan_inclusive, add, ulong, false)
 RANGE_OP(scan_inclusive, add, half, true)
 RANGE_OP(scan_inclusive, add, float, true)
 RANGE_OP(scan_inclusive, add, double, true)
+RANGE_OP(scan_inclusive, add, short, true)
+RANGE_OP(scan_inclusive, add, ushort, false)
 /* scan_inclusive min */
 RANGE_OP(scan_inclusive, min, int, true)
 RANGE_OP(scan_inclusive, min, uint, false)
@@ -109,6 +117,8 @@  RANGE_OP(scan_inclusive, min, ulong, false)
 RANGE_OP(scan_inclusive, min, half, true)
 RANGE_OP(scan_inclusive, min, float, true)
 RANGE_OP(scan_inclusive, min, double, true)
+RANGE_OP(scan_inclusive, min, short, true)
+RANGE_OP(scan_inclusive, min, ushort, false)
 /* scan_inclusive max */
 RANGE_OP(scan_inclusive, max, int, true)
 RANGE_OP(scan_inclusive, max, uint, false)
@@ -117,6 +127,8 @@  RANGE_OP(scan_inclusive, max, ulong, false)
 RANGE_OP(scan_inclusive, max, half, true)
 RANGE_OP(scan_inclusive, max, float, true)
 RANGE_OP(scan_inclusive, max, double, true)
+RANGE_OP(scan_inclusive, max, short, true)
+RANGE_OP(scan_inclusive, max, ushort, false)
 
 /* scan_exclusive add */
 RANGE_OP(scan_exclusive, add, int, true)
@@ -126,6 +138,8 @@  RANGE_OP(scan_exclusive, add, ulong, false)
 RANGE_OP(scan_exclusive, add, half, true)
 RANGE_OP(scan_exclusive, add, float, true)
 RANGE_OP(scan_exclusive, add, double, true)
+RANGE_OP(scan_exclusive, add, short, true)
+RANGE_OP(scan_exclusive, add, ushort, false)
 /* scan_exclusive min */
 RANGE_OP(scan_exclusive, min, int, true)
 RANGE_OP(scan_exclusive, min, uint, false)
@@ -134,6 +148,8 @@  RANGE_OP(scan_exclusive, min, ulong, false)
 RANGE_OP(scan_exclusive, min, half, true)
 RANGE_OP(scan_exclusive, min, float, true)
 RANGE_OP(scan_exclusive, min, double, true)
+RANGE_OP(scan_exclusive, min, short, true)
+RANGE_OP(scan_exclusive, min, ushort, false)
 /* scan_exclusive max */
 RANGE_OP(scan_exclusive, max, int, true)
 RANGE_OP(scan_exclusive, max, uint, false)
@@ -142,8 +158,36 @@  RANGE_OP(scan_exclusive, max, ulong, false)
 RANGE_OP(scan_exclusive, max, half, true)
 RANGE_OP(scan_exclusive, max, float, true)
 RANGE_OP(scan_exclusive, max, double, true)
+RANGE_OP(scan_exclusive, max, short, true)
+RANGE_OP(scan_exclusive, max, ushort, false)
 
 #undef RANGE_OP
+
+#define INTEL_RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \
+    OVERLOADABLE GEN_TYPE intel_sub_group_##RANGE##_##OP(GEN_TYPE x) { \
+      return __gen_ocl_sub_group_##RANGE##_##OP(SIGN, x);  \
+    }
+
+INTEL_RANGE_OP(reduce, add, short, true)
+INTEL_RANGE_OP(reduce, add, ushort, false)
+INTEL_RANGE_OP(reduce, min, short, true)
+INTEL_RANGE_OP(reduce, min, ushort, false)
+INTEL_RANGE_OP(reduce, max, short, true)
+INTEL_RANGE_OP(reduce, max, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, add, short, true)
+INTEL_RANGE_OP(scan_inclusive, add, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, min, short, true)
+INTEL_RANGE_OP(scan_inclusive, min, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, max, short, true)
+INTEL_RANGE_OP(scan_inclusive, max, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, add, short, true)
+INTEL_RANGE_OP(scan_exclusive, add, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, min, short, true)
+INTEL_RANGE_OP(scan_exclusive, min, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, max, short, true)
+INTEL_RANGE_OP(scan_exclusive, max, ushort, false)
+
+#undef INTEL_RANGE_OP
 PURE CONST uint __gen_ocl_sub_group_block_read_mem(const global uint* p);
 PURE CONST uint2 __gen_ocl_sub_group_block_read_mem2(const global uint* p);
 PURE CONST uint4 __gen_ocl_sub_group_block_read_mem4(const global uint* p);
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index c609c2e..d0f06d1 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -55,6 +55,10 @@  OVERLOADABLE ulong sub_group_reduce_add(ulong x);
 OVERLOADABLE half sub_group_reduce_add(half x);
 OVERLOADABLE float sub_group_reduce_add(float x);
 OVERLOADABLE double sub_group_reduce_add(double x);
+OVERLOADABLE short sub_group_reduce_add(short x);
+OVERLOADABLE ushort sub_group_reduce_add(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_add(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_add(ushort x);
 
 /* reduce min */
 OVERLOADABLE int sub_group_reduce_min(int x);
@@ -64,6 +68,10 @@  OVERLOADABLE ulong sub_group_reduce_min(ulong x);
 OVERLOADABLE half sub_group_reduce_min(half x);
 OVERLOADABLE float sub_group_reduce_min(float x);
 OVERLOADABLE double sub_group_reduce_min(double x);
+OVERLOADABLE short sub_group_reduce_min(short x);
+OVERLOADABLE ushort sub_group_reduce_min(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_min(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_min(ushort x);
 
 /* reduce max */
 OVERLOADABLE int sub_group_reduce_max(int x);
@@ -73,6 +81,10 @@  OVERLOADABLE ulong sub_group_reduce_max(ulong x);
 OVERLOADABLE half sub_group_reduce_max(half x);
 OVERLOADABLE float sub_group_reduce_max(float x);
 OVERLOADABLE double sub_group_reduce_max(double x);
+OVERLOADABLE short sub_group_reduce_max(short x);
+OVERLOADABLE ushort sub_group_reduce_max(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_max(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_max(ushort x);
 
 /* scan_inclusive add */
 OVERLOADABLE int sub_group_scan_inclusive_add(int x);
@@ -82,6 +94,10 @@  OVERLOADABLE ulong sub_group_scan_inclusive_add(ulong x);
 OVERLOADABLE half sub_group_scan_inclusive_add(half x);
 OVERLOADABLE float sub_group_scan_inclusive_add(float x);
 OVERLOADABLE double sub_group_scan_inclusive_add(double x);
+OVERLOADABLE short sub_group_scan_inclusive_add(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_add(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_add(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_add(ushort x);
 
 /* scan_inclusive min */
 OVERLOADABLE int sub_group_scan_inclusive_min(int x);
@@ -91,6 +107,10 @@  OVERLOADABLE ulong sub_group_scan_inclusive_min(ulong x);
 OVERLOADABLE half sub_group_scan_inclusive_min(half x);
 OVERLOADABLE float sub_group_scan_inclusive_min(float x);
 OVERLOADABLE double sub_group_scan_inclusive_min(double x);
+OVERLOADABLE short sub_group_scan_inclusive_min(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_min(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_min(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_min(ushort x);
 
 /* scan_inclusive max */
 OVERLOADABLE int sub_group_scan_inclusive_max(int x);
@@ -100,6 +120,10 @@  OVERLOADABLE ulong sub_group_scan_inclusive_max(ulong x);
 OVERLOADABLE half sub_group_scan_inclusive_max(half x);
 OVERLOADABLE float sub_group_scan_inclusive_max(float x);
 OVERLOADABLE double sub_group_scan_inclusive_max(double x);
+OVERLOADABLE short sub_group_scan_inclusive_max(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_max(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_max(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_max(ushort x);
 
 /* scan_exclusive add */
 OVERLOADABLE int sub_group_scan_exclusive_add(int x);
@@ -109,6 +133,10 @@  OVERLOADABLE ulong sub_group_scan_exclusive_add(ulong x);
 OVERLOADABLE half sub_group_scan_exclusive_add(half x);
 OVERLOADABLE float sub_group_scan_exclusive_add(float x);
 OVERLOADABLE double sub_group_scan_exclusive_add(double x);
+OVERLOADABLE short sub_group_scan_exclusive_add(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_add(ushort x);
+OVERLOADABLE short intel_sub_group_scan_exclusive_add(short x);
+OVERLOADABLE ushort intel_sub_group_scan_exclusive_add(ushort x);
 
 /* scan_exclusive min */
 OVERLOADABLE int sub_group_scan_exclusive_min(int x);
@@ -118,6 +146,10 @@  OVERLOADABLE ulong sub_group_scan_exclusive_min(ulong x);
 OVERLOADABLE half sub_group_scan_exclusive_min(half x);
 OVERLOADABLE float sub_group_scan_exclusive_min(float x);
 OVERLOADABLE double sub_group_scan_exclusive_min(double x);
+OVERLOADABLE short sub_group_scan_exclusive_min(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_min(ushort x);
+OVERLOADABLE short intel_sug_group_scan_exclusive_min(short x);
+OVERLOADABLE ushort intel_sug_group_scan_exclusive_min(ushort x);
 
 /* scan_exclusive max */
 OVERLOADABLE int sub_group_scan_exclusive_max(int x);
@@ -127,6 +159,10 @@  OVERLOADABLE ulong sub_group_scan_exclusive_max(ulong x);
 OVERLOADABLE half sub_group_scan_exclusive_max(half x);
 OVERLOADABLE float sub_group_scan_exclusive_max(float x);
 OVERLOADABLE double sub_group_scan_exclusive_max(double x);
+OVERLOADABLE short sub_group_scan_exclusive_max(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_max(ushort x);
+OVERLOADABLE short intel_sug_group_scan_exclusive_max(short x);
+OVERLOADABLE ushort intel_sug_group_scan_exclusive_max(ushort x);
 
 /* shuffle */
 OVERLOADABLE half intel_sub_group_shuffle(half x, uint c);