[14/14] Utest: Add subgroup block read/write ushort test case

Submitted by Pan Xiuli on Oct. 12, 2016, 8:56 a.m.

Details

Message ID 1476262604-27504-14-git-send-email-xiuli.pan@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Beignet

Browsing this patch as part of:
"Series without cover letter" rev 1 in Beignet
<< prev patch [14/14] next patch >>

Commit Message

Pan Xiuli Oct. 12, 2016, 8:56 a.m.
From: Pan Xiuli <xiuli.pan@intel.com>

Add ushort block read/write for buffer and image.
Refine uint block read/write with suffix _ui.

Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
---
 kernels/compiler_subgroup_buffer_block_read.cl  | 47 ++++++++++--
 kernels/compiler_subgroup_buffer_block_write.cl | 44 +++++++++--
 kernels/compiler_subgroup_image_block_read.cl   | 49 +++++++++++--
 kernels/compiler_subgroup_image_block_write.cl  | 46 +++++++++---
 utests/compiler_subgroup_buffer_block_read.cpp  | 73 +++++++++++++++---
 utests/compiler_subgroup_buffer_block_write.cpp | 74 ++++++++++++++++---
 utests/compiler_subgroup_image_block_read.cpp   | 98 +++++++++++++++++++------
 utests/compiler_subgroup_image_block_write.cpp  | 73 +++++++++++++++---
 8 files changed, 412 insertions(+), 92 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernels/compiler_subgroup_buffer_block_read.cl b/kernels/compiler_subgroup_buffer_block_read.cl
index 9edaa2e..4cbf894 100644
--- a/kernels/compiler_subgroup_buffer_block_read.cl
+++ b/kernels/compiler_subgroup_buffer_block_read.cl
@@ -1,31 +1,62 @@ 
-__kernel void compiler_subgroup_buffer_block_read1(global uint *src, global uint *dst)
+__kernel void compiler_subgroup_buffer_block_read_ui1(global uint *src, global uint *dst)
 {
   int id = get_global_id(0);
   global uint * p = src + get_sub_group_id() * get_max_sub_group_size();
-  uint tmp = intel_sub_group_block_read(p);
+  uint tmp = intel_sub_group_block_read_ui(p);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_buffer_block_read2(global uint *src, global uint2 *dst)
+__kernel void compiler_subgroup_buffer_block_read_ui2(global uint *src, global uint2 *dst)
 {
   int id = get_global_id(0);
   global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*2;
-  uint2 tmp = intel_sub_group_block_read2(p);
+  uint2 tmp = intel_sub_group_block_read_ui2(p);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_buffer_block_read4(global uint *src, global uint4 *dst)
+__kernel void compiler_subgroup_buffer_block_read_ui4(global uint *src, global uint4 *dst)
 {
   int id = get_global_id(0);
   global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*4;
-  uint4 tmp = intel_sub_group_block_read4(p);
+  uint4 tmp = intel_sub_group_block_read_ui4(p);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_buffer_block_read8(global uint *src, global uint8 *dst)
+__kernel void compiler_subgroup_buffer_block_read_ui8(global uint *src, global uint8 *dst)
 {
   int id = get_global_id(0);
   global uint * p = src + get_sub_group_id() * get_max_sub_group_size()*8;
-  uint8 tmp = intel_sub_group_block_read8(p);
+  uint8 tmp = intel_sub_group_block_read_ui8(p);
   dst[id] = tmp;
 }
+#ifdef SHORT
+__kernel void compiler_subgroup_buffer_block_read_us1(global ushort *src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = src + get_sub_group_id() * get_max_sub_group_size();
+  ushort tmp = intel_sub_group_block_read_us(p);
+  dst[id] = tmp;
+}
+__kernel void compiler_subgroup_buffer_block_read_us2(global ushort *src, global ushort2 *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*2;
+  ushort2 tmp = intel_sub_group_block_read_us2(p);
+  dst[id] = tmp;
+}
+__kernel void compiler_subgroup_buffer_block_read_us4(global ushort *src, global ushort4 *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*4;
+  ushort4 tmp = intel_sub_group_block_read_us4(p);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_buffer_block_read_us8(global ushort *src, global ushort8 *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = src + get_sub_group_id() * get_max_sub_group_size()*8;
+  ushort8 tmp = intel_sub_group_block_read_us8(p);
+  dst[id] = tmp;
+}
+#endif
diff --git a/kernels/compiler_subgroup_buffer_block_write.cl b/kernels/compiler_subgroup_buffer_block_write.cl
index f735855..f452dcc 100644
--- a/kernels/compiler_subgroup_buffer_block_write.cl
+++ b/kernels/compiler_subgroup_buffer_block_write.cl
@@ -1,27 +1,55 @@ 
-__kernel void compiler_subgroup_buffer_block_write1(global uint *src, global uint *dst)
+__kernel void compiler_subgroup_buffer_block_write_ui1(global uint *src, global uint *dst)
 {
   int id = get_global_id(0);
   global uint * p = dst + get_sub_group_id() * get_max_sub_group_size();
-  intel_sub_group_block_write(p,src[id]);
+  intel_sub_group_block_write_ui(p,src[id]);
 }
 
-__kernel void compiler_subgroup_buffer_block_write2(global uint2 *src, global uint *dst)
+__kernel void compiler_subgroup_buffer_block_write_ui2(global uint2 *src, global uint *dst)
 {
   int id = get_global_id(0);
   global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*2;
-  intel_sub_group_block_write2(p,src[id]);
+  intel_sub_group_block_write_ui2(p,src[id]);
 }
 
-__kernel void compiler_subgroup_buffer_block_write4(global uint4 *src, global uint *dst)
+__kernel void compiler_subgroup_buffer_block_write_ui4(global uint4 *src, global uint *dst)
 {
   int id = get_global_id(0);
   global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*4;
-  intel_sub_group_block_write4(p,src[id]);
+  intel_sub_group_block_write_ui4(p,src[id]);
 }
 
-__kernel void compiler_subgroup_buffer_block_write8(global uint8 *src, global uint *dst)
+__kernel void compiler_subgroup_buffer_block_write_ui8(global uint8 *src, global uint *dst)
 {
   int id = get_global_id(0);
   global uint * p = dst + get_sub_group_id() * get_max_sub_group_size()*8;
-  intel_sub_group_block_write8(p,src[id]);
+  intel_sub_group_block_write_ui8(p,src[id]);
 }
+#ifdef SHORT
+__kernel void compiler_subgroup_buffer_block_write_us1(global ushort *src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size();
+  intel_sub_group_block_write_us(p,src[id]);
+}
+
+__kernel void compiler_subgroup_buffer_block_write_us2(global ushort2 *src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*2;
+  intel_sub_group_block_write_us2(p,src[id]);
+}
+
+__kernel void compiler_subgroup_buffer_block_write_us4(global ushort4 *src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*4;
+  intel_sub_group_block_write_us4(p,src[id]);
+}
+__kernel void compiler_subgroup_buffer_block_write_us8(global ushort8 *src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  global ushort * p = dst + get_sub_group_id() * get_max_sub_group_size()*8;
+  intel_sub_group_block_write_us8(p,src[id]);
+}
+#endif
diff --git a/kernels/compiler_subgroup_image_block_read.cl b/kernels/compiler_subgroup_image_block_read.cl
index d5df6db..fa079b7 100644
--- a/kernels/compiler_subgroup_image_block_read.cl
+++ b/kernels/compiler_subgroup_image_block_read.cl
@@ -1,31 +1,64 @@ 
-__kernel void compiler_subgroup_image_block_read1(image2d_t src, global uint *dst)
+__kernel void compiler_subgroup_image_block_read_ui1(image2d_t src, global uint *dst)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  uint tmp = intel_sub_group_block_read(src,coord);
+  uint tmp = intel_sub_group_block_read_ui(src,coord);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_image_block_read2(image2d_t src, global uint2 *dst)
+__kernel void compiler_subgroup_image_block_read_ui2(image2d_t src, global uint2 *dst)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  uint2 tmp = intel_sub_group_block_read2(src,coord);
+  uint2 tmp = intel_sub_group_block_read_ui2(src,coord);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_image_block_read4(image2d_t src, global uint4 *dst)
+__kernel void compiler_subgroup_image_block_read_ui4(image2d_t src, global uint4 *dst)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  uint4 tmp = intel_sub_group_block_read4(src,coord);
+  uint4 tmp = intel_sub_group_block_read_ui4(src,coord);
   dst[id] = tmp;
 }
 
-__kernel void compiler_subgroup_image_block_read8(image2d_t src, global uint8 *dst)
+__kernel void compiler_subgroup_image_block_read_ui8(image2d_t src, global uint8 *dst)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  uint8 tmp = intel_sub_group_block_read8(src,coord);
+  uint8 tmp = intel_sub_group_block_read_ui8(src,coord);
   dst[id] = tmp;
 }
+#ifdef SHORT
+__kernel void compiler_subgroup_image_block_read_us1(image2d_t src, global ushort *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  ushort tmp = intel_sub_group_block_read_us(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read_us2(image2d_t src, global ushort2 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  ushort2 tmp = intel_sub_group_block_read_us2(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read_us4(image2d_t src, global ushort4 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  ushort4 tmp = intel_sub_group_block_read_us4(src,coord);
+  dst[id] = tmp;
+}
+
+__kernel void compiler_subgroup_image_block_read_us8(image2d_t src, global ushort8 *dst)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  ushort8 tmp = intel_sub_group_block_read_us8(src,coord);
+  dst[id] = tmp;
+}
+#endif
diff --git a/kernels/compiler_subgroup_image_block_write.cl b/kernels/compiler_subgroup_image_block_write.cl
index d9b3717..7d97c59 100644
--- a/kernels/compiler_subgroup_image_block_write.cl
+++ b/kernels/compiler_subgroup_image_block_write.cl
@@ -1,27 +1,55 @@ 
-__kernel void compiler_subgroup_image_block_write1(image2d_t dst, global uint *src)
+__kernel void compiler_subgroup_image_block_write_ui1(image2d_t dst, global uint *src)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  intel_sub_group_block_write(dst,coord, src[id]);
+  intel_sub_group_block_write_ui(dst,coord, src[id]);
 }
-
-__kernel void compiler_subgroup_image_block_write2(image2d_t dst, global uint2 *src)
+__kernel void compiler_subgroup_image_block_write_ui2(image2d_t dst, global uint2 *src)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  intel_sub_group_block_write2(dst,coord, src[id]);
+  intel_sub_group_block_write_ui2(dst,coord, src[id]);
 }
 
-__kernel void compiler_subgroup_image_block_write4(image2d_t dst, global uint4 *src)
+__kernel void compiler_subgroup_image_block_write_ui4(image2d_t dst, global uint4 *src)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  intel_sub_group_block_write4(dst,coord, src[id]);
+  intel_sub_group_block_write_ui4(dst,coord, src[id]);
 }
 
-__kernel void compiler_subgroup_image_block_write8(image2d_t dst, global uint8 *src)
+__kernel void compiler_subgroup_image_block_write_ui8(image2d_t dst, global uint8 *src)
 {
   int id = get_global_id(0);
   int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(uint),0);
-  intel_sub_group_block_write8(dst,coord, src[id]);
+  intel_sub_group_block_write_ui8(dst,coord, src[id]);
+}
+#ifdef SHORT
+__kernel void compiler_subgroup_image_block_write_us1(image2d_t dst, global ushort *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  intel_sub_group_block_write_us(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write_us2(image2d_t dst, global ushort2 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  intel_sub_group_block_write_us2(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write_us4(image2d_t dst, global ushort4 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  intel_sub_group_block_write_us4(dst,coord, src[id]);
+}
+
+__kernel void compiler_subgroup_image_block_write_us8(image2d_t dst, global ushort8 *src)
+{
+  int id = get_global_id(0);
+  int2 coord = (int2)(get_simd_size()*get_sub_group_id()*sizeof(ushort),0);
+  intel_sub_group_block_write_us8(dst,coord, src[id]);
 }
+#endif
diff --git a/utests/compiler_subgroup_buffer_block_read.cpp b/utests/compiler_subgroup_buffer_block_read.cpp
index 9707f19..74bc899 100644
--- a/utests/compiler_subgroup_buffer_block_read.cpp
+++ b/utests/compiler_subgroup_buffer_block_read.cpp
@@ -64,6 +64,7 @@  static void generate_data(T* &input,
         input[(gid + lid)*VEC_SIZE + vsz] += ((rand() % 2 - 1) * base_val);
         /* add trailing random bits, tests GENERAL cases */
         input[(gid + lid)*VEC_SIZE + vsz] += (rand() % 112);
+        //input[(gid + lid)*VEC_SIZE + vsz] = (gid + lid)*VEC_SIZE + vsz;
 
 #if DEBUG_STDOUT
         /* output generated input */
@@ -156,47 +157,95 @@  static void subgroup_generic(T* input,
 /*
  * subgroup buffer block read
  */
-void compiler_subgroup_buffer_block_read1(void)
+void compiler_subgroup_buffer_block_read_ui1(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read",
-                              "compiler_subgroup_buffer_block_read1");
+                              "compiler_subgroup_buffer_block_read_ui1");
   subgroup_generic(input, expected, 1);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read1);
-void compiler_subgroup_buffer_block_read2(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui1);
+void compiler_subgroup_buffer_block_read_ui2(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read",
-                              "compiler_subgroup_buffer_block_read2");
+                              "compiler_subgroup_buffer_block_read_ui2");
   subgroup_generic(input, expected, 2);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read2);
-void compiler_subgroup_buffer_block_read4(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui2);
+void compiler_subgroup_buffer_block_read_ui4(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read",
-                              "compiler_subgroup_buffer_block_read4");
+                              "compiler_subgroup_buffer_block_read_ui4");
   subgroup_generic(input, expected, 4);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read4);
-void compiler_subgroup_buffer_block_read8(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui4);
+void compiler_subgroup_buffer_block_read_ui8(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_read",
-                              "compiler_subgroup_buffer_block_read8");
+                              "compiler_subgroup_buffer_block_read_ui8");
   subgroup_generic(input, expected, 8);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read8);
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_ui8);
+void compiler_subgroup_buffer_block_read_us1(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl",
+                           "compiler_subgroup_buffer_block_read_us1",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us1);
+void compiler_subgroup_buffer_block_read_us2(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl",
+                           "compiler_subgroup_buffer_block_read_us2",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us2);
+void compiler_subgroup_buffer_block_read_us4(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl",
+                           "compiler_subgroup_buffer_block_read_us4",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us4);
+void compiler_subgroup_buffer_block_read_us8(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_read.cl",
+                           "compiler_subgroup_buffer_block_read_us8",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_read_us8);
diff --git a/utests/compiler_subgroup_buffer_block_write.cpp b/utests/compiler_subgroup_buffer_block_write.cpp
index 6b257c5..3b538da 100644
--- a/utests/compiler_subgroup_buffer_block_write.cpp
+++ b/utests/compiler_subgroup_buffer_block_write.cpp
@@ -64,6 +64,7 @@  static void generate_data(T* &input,
         input[(gid + lid)*VEC_SIZE + vsz] += ((rand() % 2 - 1) * base_val);
         /* add trailing random bits, tests GENERAL cases */
         input[(gid + lid)*VEC_SIZE + vsz] += (rand() % 112);
+        //input[(gid + lid)*VEC_SIZE + vsz] = (gid + lid)*VEC_SIZE + vsz;
 
 #if DEBUG_STDOUT
         /* output generated input */
@@ -156,47 +157,96 @@  static void subgroup_generic(T* input,
 /*
  * subgroup buffer block write
  */
-void compiler_subgroup_buffer_block_write1(void)
+void compiler_subgroup_buffer_block_write_ui1(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write",
-                              "compiler_subgroup_buffer_block_write1");
+                              "compiler_subgroup_buffer_block_write_ui1");
   subgroup_generic(input, expected, 1);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write1);
-void compiler_subgroup_buffer_block_write2(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui1);
+void compiler_subgroup_buffer_block_write_ui2(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write",
-                              "compiler_subgroup_buffer_block_write2");
+                              "compiler_subgroup_buffer_block_write_ui2");
   subgroup_generic(input, expected, 2);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write2);
-void compiler_subgroup_buffer_block_write4(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui2);
+void compiler_subgroup_buffer_block_write_ui4(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write",
-                              "compiler_subgroup_buffer_block_write4");
+                              "compiler_subgroup_buffer_block_write_ui4");
   subgroup_generic(input, expected, 4);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write4);
-void compiler_subgroup_buffer_block_write8(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui4);
+void compiler_subgroup_buffer_block_write_ui8(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_buffer_block_write",
-                              "compiler_subgroup_buffer_block_write8");
+                              "compiler_subgroup_buffer_block_write_ui8");
   subgroup_generic(input, expected, 8);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write8);
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_ui8);
+
+void compiler_subgroup_buffer_block_write_us1(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl",
+                           "compiler_subgroup_buffer_block_write_us1",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us1);
+void compiler_subgroup_buffer_block_write_us2(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl",
+                           "compiler_subgroup_buffer_block_write_us2",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us2);
+void compiler_subgroup_buffer_block_write_us4(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl",
+                           "compiler_subgroup_buffer_block_write_us4",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us4);
+void compiler_subgroup_buffer_block_write_us8(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_buffer_block_write.cl",
+                           "compiler_subgroup_buffer_block_write_us8",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_buffer_block_write_us8);
diff --git a/utests/compiler_subgroup_image_block_read.cpp b/utests/compiler_subgroup_image_block_read.cpp
index 02c8f07..879d622 100644
--- a/utests/compiler_subgroup_image_block_read.cpp
+++ b/utests/compiler_subgroup_image_block_read.cpp
@@ -21,7 +21,7 @@  static void compute_expected(T* input,
 {
   for(uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
     for(uint32_t j = 0; j < VEC_SIZE; j++)
-      expected[i * VEC_SIZE + j] = input[WG_GLOBAL_SIZE * j + i];
+      expected[i * VEC_SIZE + j] = input[WG_GLOBAL_SIZE * 4 / sizeof(T) * j + i];
 }
 
 /*
@@ -33,7 +33,8 @@  static void generate_data(T* &input,
                           size_t VEC_SIZE)
 {
   /* allocate input and expected arrays */
-  input = new T[WG_GLOBAL_SIZE * VEC_SIZE];
+  int* input_ui = new int[WG_GLOBAL_SIZE * VEC_SIZE];
+  input = (T*)input_ui;
   expected = new T[WG_GLOBAL_SIZE * VEC_SIZE];
 
   /* base value for all data types */
@@ -45,19 +46,22 @@  static void generate_data(T* &input,
 #if DEBUG_STDOUT
     cout << endl << "IN: " << endl;
 #endif
+  uint32_t rpitch = sizeof(uint32_t) * WG_GLOBAL_SIZE / sizeof(T);
   /* generate inputs and expected values */
-  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE * VEC_SIZE; gid++)
-  {
-    /* initially 0, augment after */
-    input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112);
+  for(uint32_t h = 0; h < VEC_SIZE; ++h) {
+    for(uint32_t w = 0; w < WG_GLOBAL_SIZE; ++w)
+    {
+      /* initially 0, augment after */
+      input[w + h * rpitch] = ((rand() % 2 - 1) * base_val) + (rand() % 112);
+      //input[w + h * rpitch] = w + h * WG_GLOBAL_SIZE;
 
 #if DEBUG_STDOUT
-    /* output generated input */
-    cout << setw(4) << input[gid] << ", " ;
-    if((gid + 1) % 8 == 0)
-          cout << endl;
+      /* output generated input */
+      cout << setw(4) << input[w + h * rpitch] << ", " ;
+      if((w+ 1) % 8 == 0)
+            cout << endl;
 #endif
-
+    }
   }
   /* expected values */
   compute_expected(input, expected, VEC_SIZE);
@@ -151,47 +155,95 @@  static void subgroup_generic(T* input,
 /*
  * sub_group image block read functions
  */
-void compiler_subgroup_image_block_read1(void)
+void compiler_subgroup_image_block_read_ui1(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
-                              "compiler_subgroup_image_block_read1");
+                              "compiler_subgroup_image_block_read_ui1");
   subgroup_generic(input, expected, 1);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read1);
-void compiler_subgroup_image_block_read2(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui1);
+void compiler_subgroup_image_block_read_ui2(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
-                              "compiler_subgroup_image_block_read2");
+                              "compiler_subgroup_image_block_read_ui2");
   subgroup_generic(input, expected, 2);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read2);
-void compiler_subgroup_image_block_read4(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui2);
+void compiler_subgroup_image_block_read_ui4(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
-                              "compiler_subgroup_image_block_read4");
+                              "compiler_subgroup_image_block_read_ui4");
   subgroup_generic(input, expected, 4);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read4);
-void compiler_subgroup_image_block_read8(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui4);
+void compiler_subgroup_image_block_read_ui8(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_read",
-                              "compiler_subgroup_image_block_read8");
+                              "compiler_subgroup_image_block_read_ui8");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_ui8);
+void compiler_subgroup_image_block_read_us1(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl",
+                           "compiler_subgroup_image_block_read_us1",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us1);
+void compiler_subgroup_image_block_read_us2(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl",
+                           "compiler_subgroup_image_block_read_us2",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us2);
+void compiler_subgroup_image_block_read_us4(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl",
+                           "compiler_subgroup_image_block_read_us4",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us4);
+void compiler_subgroup_image_block_read_us8(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl",
+                           "compiler_subgroup_image_block_read_us8",
+                           SOURCE, "-DSHORT");
   subgroup_generic(input, expected, 8);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read8);
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_read_us8);
diff --git a/utests/compiler_subgroup_image_block_write.cpp b/utests/compiler_subgroup_image_block_write.cpp
index 2b85167..98cbb0f 100644
--- a/utests/compiler_subgroup_image_block_write.cpp
+++ b/utests/compiler_subgroup_image_block_write.cpp
@@ -50,6 +50,7 @@  static void generate_data(T* &input,
   {
     /* initially 0, augment after */
     input[gid] = ((rand() % 2 - 1) * base_val) + (rand() % 112);
+    //input[gid] = gid;
 
 #if DEBUG_STDOUT
     /* output generated input */
@@ -155,47 +156,95 @@  static void subgroup_generic(T* input,
 /*
  * sub_group image block write functions
  */
-void compiler_subgroup_image_block_write1(void)
+void compiler_subgroup_image_block_write_ui1(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
-                              "compiler_subgroup_image_block_write1");
+                              "compiler_subgroup_image_block_write_ui1");
   subgroup_generic(input, expected, 1);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write1);
-void compiler_subgroup_image_block_write2(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui1);
+void compiler_subgroup_image_block_write_ui2(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
-                              "compiler_subgroup_image_block_write2");
+                              "compiler_subgroup_image_block_write_ui2");
   subgroup_generic(input, expected, 2);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write2);
-void compiler_subgroup_image_block_write4(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui2);
+void compiler_subgroup_image_block_write_ui4(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
-                              "compiler_subgroup_image_block_write4");
+                              "compiler_subgroup_image_block_write_ui4");
   subgroup_generic(input, expected, 4);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write4);
-void compiler_subgroup_image_block_write8(void)
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui4);
+void compiler_subgroup_image_block_write_ui8(void)
 {
   if(!cl_check_subgroups())
     return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_subgroup_image_block_write",
-                              "compiler_subgroup_image_block_write8");
+                              "compiler_subgroup_image_block_write_ui8");
   subgroup_generic(input, expected, 8);
 }
-MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write8);
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_ui8);
+void compiler_subgroup_image_block_write_us1(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl",
+                           "compiler_subgroup_image_block_write_us1",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us1);
+void compiler_subgroup_image_block_write_us2(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl",
+                           "compiler_subgroup_image_block_write_us2",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 2);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us2);
+void compiler_subgroup_image_block_write_us4(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl",
+                           "compiler_subgroup_image_block_write_us4",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 4);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us4);
+void compiler_subgroup_image_block_write_us8(void)
+{
+  if(!cl_check_subgroups_short())
+    return;
+  cl_ushort *input = NULL;
+  cl_ushort *expected = NULL;
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_write.cl",
+                           "compiler_subgroup_image_block_write_us8",
+                           SOURCE, "-DSHORT");
+  subgroup_generic(input, expected, 8);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_image_block_write_us8);