[02/12] Runtime: Add API clGetKernelSubGroupInfoKHR for subgroup extension

Submitted by Pan Xiuli on May 26, 2016, 3:14 a.m.

Details

Message ID 1464232467-16231-2-git-send-email-xiuli.pan@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Beignet

Not browsing as part of any series.

Commit Message

Pan Xiuli May 26, 2016, 3:14 a.m.
From: Pan Xiuli <xiuli.pan@intel.com>

Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
---
 include/CL/cl_intel.h | 27 +++++++++++++++++
 src/cl_api.c          | 20 +++++++++++++
 src/cl_device_id.c    | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/cl_device_id.h    |  9 ++++++
 4 files changed, 139 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 0ea4af4..47bae46 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -170,6 +170,33 @@  typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateImageFromFdINTEL_fn)(
                              const cl_import_image_info_intel *    /* info */,
                              cl_int *                              /* errcode_ret */);
 
+#ifndef CL_VERSION_2_0
+typedef cl_uint  cl_kernel_sub_group_info;
+
+/* cl_khr_sub_group_info */
+#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR	0x2033
+#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR		0x2034
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
+						   cl_device_id /*in_device*/,
+						   cl_kernel_sub_group_info /* param_name */,
+						   size_t /*input_value_size*/,
+						   const void * /*input_value*/,
+						   size_t /*param_value_size*/,
+						   void* /*param_value*/,
+						   size_t* /*param_value_size_ret*/ );
+
+typedef CL_API_ENTRY cl_int
+     ( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
+						      cl_device_id /*in_device*/,
+						      cl_kernel_sub_group_info /* param_name */,
+						      size_t /*input_value_size*/,
+						      const void * /*input_value*/,
+						      size_t /*param_value_size*/,
+						      void* /*param_value*/,
+						      size_t* /*param_value_size_ret*/ );
+#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/cl_api.c b/src/cl_api.c
index 881ea6d..85ed4cf 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1348,6 +1348,26 @@  clGetKernelWorkGroupInfo(cl_kernel                   kernel,
 }
 
 cl_int
+clGetKernelSubGroupInfoKHR(cl_kernel                   kernel,
+                          cl_device_id                device,
+                          cl_kernel_work_group_info   param_name,
+                          size_t                      input_value_size,
+                          const void *                input_value,
+                          size_t                      param_value_size,
+                          void *                      param_value,
+                          size_t *                    param_value_size_ret)
+{
+  return cl_get_kernel_subgroup_info(kernel,
+                                     device,
+                                     param_name,
+                                     input_value_size,
+                                     input_value,
+                                     param_value_size,
+                                     param_value,
+                                     param_value_size_ret);
+}
+
+cl_int
 clWaitForEvents(cl_uint          num_events,
                 const cl_event * event_list)
 {
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 00d014b..66666ea 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -27,6 +27,7 @@ 
 #include "cl_thread.h"
 #include "CL/cl.h"
 #include "CL/cl_ext.h"
+#include "CL/cl_intel.h"
 #include "cl_gbe_loader.h"
 #include "cl_alloc.h"
 
@@ -1088,3 +1089,85 @@  error:
   return err;
 }
 
+LOCAL cl_int
+cl_get_kernel_subgroup_info(cl_kernel kernel,
+                            cl_device_id device,
+                            cl_kernel_work_group_info param_name,
+                            size_t input_value_size,
+                            const void* input_value,
+                            size_t param_value_size,
+                            void* param_value,
+                            size_t* param_value_size_ret)
+{
+  int err = CL_SUCCESS;
+  if(device != NULL)
+    if (kernel->program->ctx->device != device)
+      return CL_INVALID_DEVICE;
+
+  CHECK_KERNEL(kernel);
+  switch (param_name) {
+    case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR:
+    {
+      int i, dim = 0;
+      size_t local_sz = 1;
+      if (param_value && param_value_size < sizeof(size_t))
+        return CL_INVALID_VALUE;
+      if (param_value_size_ret != NULL)
+        *param_value_size_ret = sizeof(size_t);
+      switch (input_value_size)
+      {
+        case sizeof(size_t)*1:
+        case sizeof(size_t)*2:
+        case sizeof(size_t)*3:
+          dim = input_value_size/sizeof(size_t);
+          break;
+        default: return CL_INVALID_VALUE;
+      }
+      if (input_value == NULL )
+        return CL_INVALID_VALUE;
+      for(i = 0; i < dim; i++)
+        local_sz *= ((size_t*)input_value)[i];
+      if (param_value) {
+        size_t simd_sz = cl_kernel_get_simd_width(kernel);
+        size_t sub_group_size = local_sz >= simd_sz? simd_sz : local_sz;
+        *(size_t*)param_value = sub_group_size;
+        return CL_SUCCESS;
+      }
+      break;
+    }
+    case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR:
+    {
+      int i, dim = 0;
+      size_t local_sz = 1;
+      if (param_value && param_value_size < sizeof(size_t))
+        return CL_INVALID_VALUE;
+      if (param_value_size_ret != NULL)
+        *param_value_size_ret = sizeof(size_t);
+      switch (input_value_size)
+      {
+        case sizeof(size_t)*1:
+        case sizeof(size_t)*2:
+        case sizeof(size_t)*3:
+          dim = input_value_size/sizeof(size_t);
+          break;
+        default: return CL_INVALID_VALUE;
+      }
+      if (input_value == NULL )
+        return CL_INVALID_VALUE;
+      for(i = 0; i < dim; i++)
+        local_sz *= ((size_t*)input_value)[i];
+      if (param_value) {
+        size_t simd_sz = cl_kernel_get_simd_width(kernel);
+        size_t sub_group_num = (local_sz + simd_sz - 1) / simd_sz;
+        *(size_t*)param_value = sub_group_num;
+        return CL_SUCCESS;
+      }
+      break;
+    }
+    default:
+      return CL_INVALID_VALUE;
+  };
+
+error:
+  return err;
+}
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index b01a6fb..7db125b 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -149,6 +149,15 @@  extern cl_int cl_get_kernel_workgroup_info(cl_kernel kernel,
                                            size_t           param_value_size,
                                            void *           param_value,
                                            size_t *         param_value_size_ret);
+
+extern cl_int cl_get_kernel_subgroup_info(cl_kernel kernel,
+                                          cl_device_id     device,
+                                          cl_kernel_work_group_info   param_name,
+                                          size_t           input_value_size,
+                                          const void *     input_value,
+                                          size_t           param_value_size,
+                                          void *           param_value,
+                                          size_t *         param_value_size_ret);
 /* Returns the Gen device ID */
 extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver);
 extern size_t cl_get_kernel_max_wg_sz(cl_kernel);