utests: add utest for fdiv to rcp

Submitted by rander on June 19, 2017, 7:22 a.m.

Details

Message ID 1497856945-6901-1-git-send-email-rander.wang@intel.com
State New
Headers show
Series "utests: add utest for fdiv to rcp" ( rev: 1 ) in Beignet

Not browsing as part of any series.

Commit Message

rander June 19, 2017, 7:22 a.m.
for this case 1.0f/src, 2.0f/src can be converted,
	but 3.0f/src and i/src cant

Signed-off-by: rander.wang <rander.wang@intel.com>
---
 kernels/compiler_fdiv2rcp.cl |  8 ++++++
 utests/CMakeLists.txt        |  3 ++-
 utests/compiler_fdiv2rcp.cpp | 61 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 kernels/compiler_fdiv2rcp.cl
 create mode 100644 utests/compiler_fdiv2rcp.cpp

Patch hide | download patch | download mbox

diff --git a/kernels/compiler_fdiv2rcp.cl b/kernels/compiler_fdiv2rcp.cl
new file mode 100644
index 0000000..99e0005
--- /dev/null
+++ b/kernels/compiler_fdiv2rcp.cl
@@ -0,0 +1,8 @@ 
+kernel void compiler_fdiv2rcp(global float *src, global float *dst) {
+  int i = get_global_id(0);
+  float tmp = src[i];
+  dst[i*4] = 1.0f/tmp;
+  dst[i*4+1] = (float)i/tmp;
+  dst[i*4+2] = 2.0f/tmp;
+  dst[i*4+3] = 3.0f/tmp;
+};
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index ebbf0f5..655a314 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -306,7 +306,8 @@  set (utests_sources
   compiler_pipe_builtin.cpp
   compiler_device_enqueue.cpp
   compiler_sqrt_div.cpp
-  compiler_remove_negative_add.cpp)
+  compiler_remove_negative_add.cpp
+  compiler_fdiv2rcp.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/compiler_fdiv2rcp.cpp b/utests/compiler_fdiv2rcp.cpp
new file mode 100644
index 0000000..7a98392
--- /dev/null
+++ b/utests/compiler_fdiv2rcp.cpp
@@ -0,0 +1,61 @@ 
+#include "utest_helper.hpp"
+#include <cmath>
+
+void compiler_fdiv2rcp(void) {
+  const int n = 1024;
+  float src[n];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_fdiv2rcp");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * 4 * sizeof(float), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int j = 0; j < 1024; j++) {
+    OCL_MAP_BUFFER(0);
+    for (int i = 0; i < n; ++i) {
+      src[i] = ((float *)buf_data[0])[i] = (j * n + i + 1) * 0.001f;
+    }
+    OCL_UNMAP_BUFFER(0);
+
+    OCL_NDRANGE(1);
+
+    OCL_MAP_BUFFER(1);
+    float *dst = (float *)buf_data[1];
+    for (int i = 0; i < n; ++i) {
+      float cpu = 1.0f / (src[i]);
+      float gpu = dst[4 * i];
+      if (fabsf(cpu - gpu) >= 1e-3) {
+        printf("%f %f %f", src[i], cpu, gpu);
+        OCL_ASSERT(0);
+      }
+
+      cpu = i / (src[i]);
+      gpu = dst[4 * i + 1];
+      if (fabsf(cpu - gpu) >= 1e-3) {
+        printf("%f %f %f", src[i], cpu, gpu);
+        OCL_ASSERT(0);
+      }
+
+      cpu = 2.0f / (src[i]);
+      gpu = dst[4 * i + 2];
+      if (fabsf(cpu - gpu) >= 1e-3) {
+        printf("%f %f %f", src[i], cpu, gpu);
+        OCL_ASSERT(0);
+      }
+
+      cpu = 3.0f / (src[i]);
+      gpu = dst[4 * i + 3];
+      if (fabsf(cpu - gpu) >= 1e-3) {
+        printf("%f %f %f", src[i], cpu, gpu);
+        OCL_ASSERT(0);
+      }
+    }
+    OCL_UNMAP_BUFFER(1);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_fdiv2rcp);