[02/10] OCL20: add device enqueue builtins.

Submitted by Yang, Rong R on March 17, 2016, 10:53 a.m.

Details

Message ID 1458212038-7900-2-git-send-email-rong.r.yang@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 4 3 2 1 ) in Beignet

Not browsing as part of any series.

Commit Message

Yang, Rong R March 17, 2016, 10:53 a.m.
Add three helper function calls for it. Store the ndrange info to stack,
and write the device enqueue infos to the auxiliary global buffer.

Signed-off-by: Yang Rong <rong.r.yang@intel.com>
---
 backend/src/backend/program.cpp          |   1 +
 backend/src/libocl/CMakeLists.txt        |   4 +-
 backend/src/libocl/include/ocl.h         |   1 +
 backend/src/libocl/include/ocl_enqueue.h |  67 +++++++++++++
 backend/src/libocl/src/ocl_enqueue.cl    | 156 +++++++++++++++++++++++++++++++
 5 files changed, 227 insertions(+), 2 deletions(-)
 create mode 100644 backend/src/libocl/include/ocl_enqueue.h
 create mode 100644 backend/src/libocl/src/ocl_enqueue.cl

Patch hide | download patch | download mbox

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 0119670..1580fe8 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -546,6 +546,7 @@  namespace gbe {
     // FIXME we haven't implement those builtin functions,
     // so disable it currently.
     args.push_back("-fno-builtin");
+    args.push_back("-fblocks");
     args.push_back("-disable-llvm-optzns");
     if(bFastMath)
       args.push_back("-D __FAST_RELAXED_MATH__=1");
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index d7ed841..3b0d5f8 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -53,7 +53,7 @@  FOREACH(M ${OCL_COPY_HEADERS})
 ENDFOREACH(M) 
 
 SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy
-                      ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe)
+                      ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe ocl_enqueue)
 FOREACH(M ${OCL_COPY_MODULES})
     COPY_THE_HEADER(${M})
     COPY_THE_SOURCE(${M})
@@ -130,7 +130,7 @@  FOREACH(M ${OCL_BASH_GENERATED_MODULES})
 ENDFOREACH(M) 
 
 
-SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0")
+SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -fblocks -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0")
 MACRO(ADD_CL_TO_BC_TARGET _file)
     # CMake seems can not add pattern rule, use MACRO to replace.
     STRING(REGEX REPLACE "${LIBOCL_BINARY_DIR}/src/\(o.*\)\\.cl" "${OCL_OBJECT_DIR}/\\1.bc" output_name ${_file})
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index e2918c6..852a523 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -41,6 +41,7 @@ 
 #include "ocl_simd.h"
 #include "ocl_work_group.h"
 #include "ocl_pipe.h"
+#include "ocl_enqueue.h"
 #pragma OPENCL EXTENSION cl_khr_fp64 : disable
 #pragma OPENCL EXTENSION cl_khr_fp16 : disable
 #endif
diff --git a/backend/src/libocl/include/ocl_enqueue.h b/backend/src/libocl/include/ocl_enqueue.h
new file mode 100644
index 0000000..a578846
--- /dev/null
+++ b/backend/src/libocl/include/ocl_enqueue.h
@@ -0,0 +1,67 @@ 
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __OCL_ENQUEUE_H__
+#define __OCL_ENQUEUE_H__
+
+#include "ocl_types.h"
+#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0
+#define CLK_SUCCESS 0
+
+struct ndrange_info_t {
+  int type;
+  int global_work_size[3];
+  int local_work_size[3];
+  int global_work_offset[3];
+};
+
+struct Block_literal {
+  void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock
+  int flags;
+  int reserved;
+  __global void (*invoke)(void *, ...);
+  struct Block_descriptor_1 {
+    unsigned long int reserved;         // NULL
+    unsigned long int size;         // sizeof(struct Block_literal_1)
+    // optional helper functions
+    void (*copy_helper)(void *dst, void *src);     // IFF (1<<25)
+    void (*dispose_helper)(void *src);             // IFF (1<<25)
+    // required ABI.2010.3.16
+    const char *signature;                         // IFF (1<<30)
+  } *descriptor;
+  // imported variables
+};
+
+int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void));
+
+queue_t get_default_queue(void);
+int __gen_enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void), int size);
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size);
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size);
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2]);
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2]);
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3]);
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3]);
+
+#endif
diff --git a/backend/src/libocl/src/ocl_enqueue.cl b/backend/src/libocl/src/ocl_enqueue.cl
new file mode 100644
index 0000000..78c39c4
--- /dev/null
+++ b/backend/src/libocl/src/ocl_enqueue.cl
@@ -0,0 +1,156 @@ 
+/*
+ * Copyright © 2012 - 2014 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_types.h"
+#include "ocl_enqueue.h"
+#include "ocl_workitem.h"
+#include "ocl_atom.h"
+
+queue_t get_default_queue(void)
+{
+  queue_t queue;
+  return queue; //return NULL queue
+}
+
+ndrange_t __gen_ocl_set_ndrange_info(__private struct ndrange_info_t *info);
+__private struct ndrange_info_t* __gen_ocl_get_ndrange_info(ndrange_t info);
+__global int* __gen_ocl_get_enqueue_info_addr(void);
+
+int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void))
+{
+  int i;
+  struct Block_literal *literal = (struct Block_literal *)block;
+  uchar *data = (uchar *)block;
+  int size = literal->descriptor->size;
+  __global int* start_addr = __gen_ocl_get_enqueue_info_addr();
+  int offset = atomic_add(start_addr, size + sizeof(struct ndrange_info_t));
+  __global uchar* addr = (__global uchar*)start_addr + offset + sizeof(int);
+  __private struct ndrange_info_t *info = __gen_ocl_get_ndrange_info(ndrange);
+
+  *((__global struct ndrange_info_t *)addr) = *info;
+  addr += sizeof(*info);
+
+  for(i=0; i< size; i++) {
+    addr[i] = data[i];
+  }
+  return 0;
+}
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size)
+{
+  struct ndrange_info_t info;
+  info.type = 0x1;
+  info.global_work_size[0] = global_work_size;
+  return __gen_ocl_set_ndrange_info(&info);
+  //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size)
+{
+  struct ndrange_info_t info;
+  info.type = 0x2;
+  info.global_work_size[0] = global_work_size;
+  info.local_work_size[0] = local_work_size;
+  return __gen_ocl_set_ndrange_info(&info);
+ // return ndrange;
+}
+
+
+OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size)
+{
+  struct ndrange_info_t info;
+  info.type = 0x3;
+  info.global_work_size[0] = global_work_size;
+  info.local_work_size[0] = local_work_size;
+  info.global_work_offset[0] = global_work_offset;
+  return __gen_ocl_set_ndrange_info(&info);
+  //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2])
+{
+  struct ndrange_info_t info;
+  info.type = 0x11;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  return __gen_ocl_set_ndrange_info(&info);
+  //return ndrange;
+}
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2])
+{
+  struct ndrange_info_t info;
+  info.type = 0x12;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  info.local_work_size[0] = local_work_size[0];
+  info.local_work_size[1] = local_work_size[1];
+  return __gen_ocl_set_ndrange_info(&info);
+}
+
+
+OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2])
+{
+  struct ndrange_info_t info;
+  info.type = 0x13;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  info.local_work_size[0] = local_work_size[0];
+  info.local_work_size[1] = local_work_size[1];
+  info.global_work_offset[0] = global_work_offset[0];
+  info.global_work_offset[1] = global_work_offset[1];
+  return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3])
+{
+  struct ndrange_info_t info;
+  info.type = 0x21;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  info.global_work_size[2] = global_work_size[2];
+  return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3])
+{
+  struct ndrange_info_t info;
+  info.type = 0x22;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  info.global_work_size[2] = global_work_size[2];
+  info.local_work_size[0] = local_work_size[0];
+  info.local_work_size[1] = local_work_size[1];
+  info.local_work_size[2] = local_work_size[2];
+  return __gen_ocl_set_ndrange_info(&info);
+}
+
+OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3])
+{
+  struct ndrange_info_t info;
+  info.type = 0x23;
+  info.global_work_size[0] = global_work_size[0];
+  info.global_work_size[1] = global_work_size[1];
+  info.global_work_size[2] = global_work_size[2];
+  info.local_work_size[0] = local_work_size[0];
+  info.local_work_size[1] = local_work_size[1];
+  info.local_work_size[2] = local_work_size[2];
+  info.global_work_offset[0] = global_work_offset[0];
+  info.global_work_offset[1] = global_work_offset[1];
+  info.global_work_offset[2] = global_work_offset[2];
+  return __gen_ocl_set_ndrange_info(&info);
+}