utests: add utest to convert_double_rte|z|p|n(int 16) it is very simple, for double can cover the range of short just check every short value with the double value, they should be the same

Submitted by rander on March 16, 2017, 8:53 a.m.

Details

Message ID 1489654421-21607-1-git-send-email-rander.wang@intel.com
State New
Headers show
Series "utests: add utest to convert_double_rte|z|p|n(int 16) it is very simple, for double can cover the range of short just check every short value with the double value, they should be the same" ( rev: 1 ) in Beignet

Not browsing as part of any series.

Commit Message

rander March 16, 2017, 8:53 a.m.
Signed-off-by: rander <rander.wang@intel.com>
---
 kernels/builtin_convert_int16toDouble.cl | 36 ++++++++++++++
 utests/CMakeLists.txt                    |  3 +-
 utests/builtin_convert_int16toDouble.cpp | 85 ++++++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 kernels/builtin_convert_int16toDouble.cl
 create mode 100644 utests/builtin_convert_int16toDouble.cpp

Patch hide | download patch | download mbox

diff --git a/kernels/builtin_convert_int16toDouble.cl b/kernels/builtin_convert_int16toDouble.cl
new file mode 100644
index 0000000..558b1c7
--- /dev/null
+++ b/kernels/builtin_convert_int16toDouble.cl
@@ -0,0 +1,36 @@ 
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void builtin_convert_int16toDouble(__global short *X,
+												__global ushort *uX,
+												__global double *Z,
+												int max_input)
+{
+	int i = get_global_id(0);
+	int j;
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtz(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtn(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rte(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtp(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtz(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtn(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rte(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtp(uX[j]);
+
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 8f006c7..6e41eeb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@  set (utests_sources
   builtin_convert_double2int16.cpp
   builtin_convert_double2int32.cpp
   builtin_convert_double2int64.cpp
-  builtin_convert_int8toDouble.cpp)
+  builtin_convert_int8toDouble.cpp
+  builtin_convert_int16toDouble.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/builtin_convert_int16toDouble.cpp b/utests/builtin_convert_int16toDouble.cpp
new file mode 100644
index 0000000..222dace
--- /dev/null
+++ b/utests/builtin_convert_int16toDouble.cpp
@@ -0,0 +1,85 @@ 
+#include "utest_helper.hpp"
+#include <cmath>
+#include <algorithm>
+
+namespace{
+
+const char*  testFunc[] =
+{
+    " double convert_double_rtz(short x)",
+    " double convert_double_rtn(short x)",
+    " double convert_double_rte(short x)",
+    " double convert_double_rtp(short x)",
+
+    " double convert_double_rtz(ushortx)",
+    " double convert_double_rtn(ushort x)",
+    " double convert_double_rte(ushort x)",
+    " double convert_double_rtp(ushort x)",
+};
+
+short *input_data;
+const int count_input = 4096;
+const int max_function = 8;
+
+static void builtin_convert_int16toDouble(void)
+{
+  // Setup kernel and buffers
+  int k, i, index_cur;
+  double gpu_data[max_function * count_input] = {0};
+  float diff;
+  char log[256] = {0};
+
+  OCL_CREATE_KERNEL("builtin_convert_int16toDouble");
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, count_input  * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, count_input * max_function * sizeof(double), NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(int), &count_input);
+
+  globals[0] = 1;
+  locals[0] = 1;
+
+  input_data = new short [4096];
+  for(int i = 0; i < 4096; i++)
+    input_data[i] = -32768 + i*16;
+  clEnqueueWriteBuffer( queue, buf[0], CL_TRUE, 0, count_input * sizeof(short), input_data, 0, NULL, NULL);
+
+   for(int i = 0; i < 4096; i++)
+     input_data[i] = i*16;
+   clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, count_input * sizeof(short), input_data, 0, NULL, NULL);
+
+   // Run the kernel
+  OCL_NDRANGE( 1 );
+
+    clEnqueueReadBuffer( queue, buf[2], CL_TRUE, 0, sizeof(double) * max_function * count_input, gpu_data, 0, NULL, NULL);
+
+    int index = 0;
+    for (k = 0; (uint)k < count_input*max_function/2; k++)
+    {
+        index = index % 4096;
+        OCL_ASSERT(gpu_data[k] == (double)(-32768 + index*16));
+        if(gpu_data[k] != (double)(-32768 + index*16))
+        {
+            printf("failed at function:%s, index:%d  expect value: %d, but get :%lf \n", testFunc[k/count_input], k%count_input, (-32768 + index*16), gpu_data[k]);
+        }
+        index ++;
+    }
+
+    double *ugpu_data = (gpu_data + max_function*count_input/2);
+      for (k = 0; (uint)k < count_input*max_function/2; k++)
+      {
+            OCL_ASSERT(ugpu_data[k] == (double)((k%4096)*16));
+            if(ugpu_data[k] != (double)((k%4096)*16))
+            {
+                printf("failed at function:%s, index:%d expect value: %d, but get :%lf \n", testFunc[k/count_input + max_function/2], k%count_input, ((k%4096)*16), ugpu_data[k]);
+            }
+      }
+
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_convert_int16toDouble)
+}