[2/2] Use aligned16 and aligne4 kernel to copy for large 3D image with TILE_Y.

Submitted by Yan Wang on June 13, 2017, 8:31 a.m.

Details

Message ID 1497342702-23558-1-git-send-email-yan.wang@linux.intel.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Yan Wang June 13, 2017, 8:31 a.m.
From: Yan Wang <yan.wang@linux.intel.com>

It is similar with 2D image for avoiding extended image width truncated.

Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
---
 src/CMakeLists.txt                                 |  2 +
 src/cl_context.h                                   |  4 ++
 src/cl_mem.c                                       | 46 +++++++++++++++++++---
 .../cl_internal_copy_buffer_to_image_3d_align16.cl | 19 +++++++++
 .../cl_internal_copy_buffer_to_image_3d_align4.cl  | 19 +++++++++
 .../cl_internal_copy_image_3d_to_buffer_align16.cl | 20 ++++++++++
 .../cl_internal_copy_image_3d_to_buffer_align4.cl  | 20 ++++++++++
 7 files changed, 125 insertions(+), 5 deletions(-)
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
 create mode 100644 src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

Patch hide | download patch | download mbox

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 87ad48b..ecb98b9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,6 +54,8 @@  cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
 cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_2d_to_buffer_align16 cl_internal_copy_image_3d_to_buffer
 cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_2d_align16 cl_internal_copy_buffer_to_image_3d
 cl_internal_copy_buffer_to_image_2d_align4 cl_internal_copy_image_2d_to_buffer_align4
+cl_internal_copy_buffer_to_image_3d_align4 cl_internal_copy_image_3d_to_buffer_align4
+cl_internal_copy_buffer_to_image_3d_align16 cl_internal_copy_image_3d_to_buffer_align16
 cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
 cl_internal_fill_buf_align128 cl_internal_fill_image_1d
diff --git a/src/cl_context.h b/src/cl_context.h
index 75bf895..b3a79bc 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -64,10 +64,14 @@  enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
   CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
   CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,   //copy buffer to image 3d
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
   CL_ENQUEUE_FILL_BUFFER_UNALIGN,      //fill buffer with 1 aligne pattern, pattern size=1
   CL_ENQUEUE_FILL_BUFFER_ALIGN2,       //fill buffer with 2 aligne pattern, pattern size=2
   CL_ENQUEUE_FILL_BUFFER_ALIGN4,       //fill buffer with 4 aligne pattern, pattern size=4
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b6dce3f..307db50 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -2162,13 +2162,13 @@  get_align_size_for_copy_kernel(struct _cl_mem_image* image, const size_t origin0
                             const size_t offset, cl_image_format *fmt) {
   size_t align_size = 0;
 
-  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN16 == 0) &&
+  if(((image->w * image->bpp) % ALIGN16 == 0) &&
       ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) && (offset % ALIGN16 == 0)){
     fmt->image_channel_order = CL_RGBA;
     fmt->image_channel_data_type = CL_UNSIGNED_INT32;
     align_size = ALIGN16;
   }
-  else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN4 == 0) &&
+  else if(((image->w * image->bpp) % ALIGN4 == 0) &&
       ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)){
     fmt->image_channel_order = CL_R;
     fmt->image_channel_data_type = CL_UNSIGNED_INT32;
@@ -2247,11 +2247,29 @@  cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
           cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
     }
   }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
-    extern char cl_internal_copy_image_3d_to_buffer_str[];
-    extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
+    if(align_size == ALIGN16){
+      extern char cl_internal_copy_image_3d_to_buffer_align16_str[];
+      extern size_t cl_internal_copy_image_3d_to_buffer_align16_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
+                cl_internal_copy_image_3d_to_buffer_align16_str,
+                (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size, NULL);
+    }
+    else if(align_size == ALIGN4){
+      extern char cl_internal_copy_image_3d_to_buffer_align4_str[];
+      extern size_t cl_internal_copy_image_3d_to_buffer_align4_str_size;
 
-    ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
+                cl_internal_copy_image_3d_to_buffer_align4_str,
+                (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size, NULL);
+    }
+    else{
+      extern char cl_internal_copy_image_3d_to_buffer_str[];
+      extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
           cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
+    }
   }
 
   if (!ker) {
@@ -2347,11 +2365,29 @@  cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
           cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
     }
   }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+    if(align_size == ALIGN16){
+      extern char cl_internal_copy_buffer_to_image_3d_align16_str[];
+      extern size_t cl_internal_copy_buffer_to_image_3d_align16_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
+                cl_internal_copy_buffer_to_image_3d_align16_str,
+                (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size, NULL);
+    }
+    else if(align_size == ALIGN4){
+      extern char cl_internal_copy_buffer_to_image_3d_align4_str[];
+      extern size_t cl_internal_copy_buffer_to_image_3d_align4_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
+                cl_internal_copy_buffer_to_image_3d_align4_str,
+                (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size, NULL);
+    }
+    else{
       extern char cl_internal_copy_buffer_to_image_3d_str[];
       extern size_t cl_internal_copy_buffer_to_image_3d_str_size;
 
       ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
           cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
+    }
   }
   if (!ker)
     return CL_OUT_OF_RESOURCES;
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
new file mode 100644
index 0000000..32f1f63
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
@@ -0,0 +1,19 @@ 
+kernel void __cl_copy_buffer_to_image_3d_align16(__write_only image3d_t image, global uint4* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+                                        unsigned int src_offset)
+{
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  int k = get_global_id(2);
+  uint4 color = (uint4)(0);
+  int4 dst_coord;
+  if((i >= region0) || (j>= region1) || (k>=region2))
+    return;
+  dst_coord.x = dst_origin0 + i;
+  dst_coord.y = dst_origin1 + j;
+  dst_coord.z = dst_origin2 + k;
+  src_offset += (k * region1 + j) * region0 + i;
+  color = buffer[src_offset];
+  write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
new file mode 100644
index 0000000..2ccbcf1
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
@@ -0,0 +1,19 @@ 
+kernel void __cl_copy_buffer_to_image_3d_align4(__write_only image3d_t image, global uint* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+                                        unsigned int src_offset)
+{
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  int k = get_global_id(2);
+  uint4 color = (uint4)(0);
+  int4 dst_coord;
+  if((i >= region0) || (j>= region1) || (k>=region2))
+    return;
+  dst_coord.x = dst_origin0 + i;
+  dst_coord.y = dst_origin1 + j;
+  dst_coord.z = dst_origin2 + k;
+  src_offset += (k * region1 + j) * region0 + i;
+  color.x = buffer[src_offset];
+  write_imageui(image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
new file mode 100644
index 0000000..e116d47
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
@@ -0,0 +1,20 @@ 
+kernel void __cl_copy_image_3d_to_buffer_align16 ( __read_only image3d_t image, global uint4* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+                                        unsigned int dst_offset)
+{
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  int k = get_global_id(2);
+  uint4 color;
+  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+  int4 src_coord;
+  if((i >= region0) || (j>= region1) || (k>=region2))
+    return;
+  src_coord.x = src_origin0 + i;
+  src_coord.y = src_origin1 + j;
+  src_coord.z = src_origin2 + k;
+  color = read_imageui(image, sampler, src_coord);
+  dst_offset += (k * region1 + j) * region0 + i;
+  *(buffer + dst_offset) = color;
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
new file mode 100644
index 0000000..d5374c4
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
@@ -0,0 +1,20 @@ 
+kernel void __cl_copy_image_3d_to_buffer_align4 ( __read_only image3d_t image, global uint* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+                                        unsigned int dst_offset)
+{
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  int k = get_global_id(2);
+  uint4 color;
+  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+  int4 src_coord;
+  if((i >= region0) || (j>= region1) || (k>=region2))
+    return;
+  src_coord.x = src_origin0 + i;
+  src_coord.y = src_origin1 + j;
+  src_coord.z = src_origin2 + k;
+  color = read_imageui(image, sampler, src_coord);
+  dst_offset += (k * region1 + j) * region0 + i;
+  buffer[dst_offset] = color.x;
+}

Comments

Yang, Rong R June 14, 2017, 7:36 a.m.
LGTM, except some format. I have run git clang-format by manual and pushed, thanks.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> yan.wang@linux.intel.com

> Sent: Tuesday, June 13, 2017 16:32

> To: beignet@lists.freedesktop.org

> Cc: Yan Wang <yan.wang@linux.intel.com>

> Subject: [Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for

> large 3D image with TILE_Y.

> 

> From: Yan Wang <yan.wang@linux.intel.com>

> 

> It is similar with 2D image for avoiding extended image width truncated.

> 

> Signed-off-by: Yan Wang <yan.wang@linux.intel.com>

> ---

>  src/CMakeLists.txt                                 |  2 +

>  src/cl_context.h                                   |  4 ++

>  src/cl_mem.c                                       | 46 +++++++++++++++++++---

>  .../cl_internal_copy_buffer_to_image_3d_align16.cl | 19

> +++++++++  .../cl_internal_copy_buffer_to_image_3d_align4.cl  | 19

> +++++++++  .../cl_internal_copy_image_3d_to_buffer_align16.cl | 20

> ++++++++++  .../cl_internal_copy_image_3d_to_buffer_align4.cl  | 20

> ++++++++++

>  7 files changed, 125 insertions(+), 5 deletions(-)  create mode 100644

> src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

>  create mode 100644

> src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

>  create mode 100644

> src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

>  create mode 100644

> src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> 

> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87ad48b..ecb98b9

> 100644

> --- a/src/CMakeLists.txt

> +++ b/src/CMakeLists.txt

> @@ -54,6 +54,8 @@ cl_internal_copy_image_2d_array_to_3d

> cl_internal_copy_image_3d_to_2d_array

>  cl_internal_copy_image_2d_to_buffer

> cl_internal_copy_image_2d_to_buffer_align16

> cl_internal_copy_image_3d_to_buffer

>  cl_internal_copy_buffer_to_image_2d

> cl_internal_copy_buffer_to_image_2d_align16

> cl_internal_copy_buffer_to_image_3d

>  cl_internal_copy_buffer_to_image_2d_align4

> cl_internal_copy_image_2d_to_buffer_align4

> +cl_internal_copy_buffer_to_image_3d_align4

> +cl_internal_copy_image_3d_to_buffer_align4

> +cl_internal_copy_buffer_to_image_3d_align16

> +cl_internal_copy_image_3d_to_buffer_align16

>  cl_internal_fill_buf_align8 cl_internal_fill_buf_align4

>  cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign

>  cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git

> a/src/cl_context.h b/src/cl_context.h index 75bf895..b3a79bc 100644

> --- a/src/cl_context.h

> +++ b/src/cl_context.h

> @@ -64,10 +64,14 @@ enum _cl_internal_ker_type {

>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,

>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,

>    CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer

> +  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,

> +  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,   //copy buffer to image 3d

> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,

> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,

>    CL_ENQUEUE_FILL_BUFFER_UNALIGN,      //fill buffer with 1 aligne pattern,

> pattern size=1

>    CL_ENQUEUE_FILL_BUFFER_ALIGN2,       //fill buffer with 2 aligne pattern,

> pattern size=2

>    CL_ENQUEUE_FILL_BUFFER_ALIGN4,       //fill buffer with 4 aligne pattern,

> pattern size=4

> diff --git a/src/cl_mem.c b/src/cl_mem.c index b6dce3f..307db50 100644

> --- a/src/cl_mem.c

> +++ b/src/cl_mem.c

> @@ -2162,13 +2162,13 @@ get_align_size_for_copy_kernel(struct

> _cl_mem_image* image, const size_t origin0

>                              const size_t offset, cl_image_format *fmt) {

>    size_t align_size = 0;

> 

> -  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *

> image->bpp) % ALIGN16 == 0) &&

> +  if(((image->w * image->bpp) % ALIGN16 == 0) &&

>        ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0)

> && (offset % ALIGN16 == 0)){

>      fmt->image_channel_order = CL_RGBA;

>      fmt->image_channel_data_type = CL_UNSIGNED_INT32;

>      align_size = ALIGN16;

>    }

> -  else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image-

> >w * image->bpp) % ALIGN4 == 0) &&

> +  else if(((image->w * image->bpp) % ALIGN4 == 0) &&

>        ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) &&

> (offset % ALIGN4 == 0)){

>      fmt->image_channel_order = CL_R;

>      fmt->image_channel_data_type = CL_UNSIGNED_INT32; @@ -2247,11

> +2247,29 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue,

> cl_event event, struct _cl_m

>            cl_internal_copy_image_2d_to_buffer_str,

> (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);

>      }

>    }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {

> -    extern char cl_internal_copy_image_3d_to_buffer_str[];

> -    extern size_t cl_internal_copy_image_3d_to_buffer_str_size;

> +    if(align_size == ALIGN16){

> +      extern char cl_internal_copy_image_3d_to_buffer_align16_str[];

> +      extern size_t

> + cl_internal_copy_image_3d_to_buffer_align16_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,

> +                cl_internal_copy_image_3d_to_buffer_align16_str,

> +                (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size,

> NULL);

> +    }

> +    else if(align_size == ALIGN4){

> +      extern char cl_internal_copy_image_3d_to_buffer_align4_str[];

> +      extern size_t

> + cl_internal_copy_image_3d_to_buffer_align4_str_size;

> 

> -    ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,

> +                cl_internal_copy_image_3d_to_buffer_align4_str,

> +                (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size,

> NULL);

> +    }

> +    else{

> +      extern char cl_internal_copy_image_3d_to_buffer_str[];

> +      extern size_t cl_internal_copy_image_3d_to_buffer_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,

>            cl_internal_copy_image_3d_to_buffer_str,

> (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);

> +    }

>    }

> 

>    if (!ker) {

> @@ -2347,11 +2365,29 @@

> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event

> event, cl_mem buffe

>            cl_internal_copy_buffer_to_image_2d_str,

> (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);

>      }

>    }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {

> +    if(align_size == ALIGN16){

> +      extern char cl_internal_copy_buffer_to_image_3d_align16_str[];

> +      extern size_t

> + cl_internal_copy_buffer_to_image_3d_align16_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,

> +                cl_internal_copy_buffer_to_image_3d_align16_str,

> +                (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size,

> NULL);

> +    }

> +    else if(align_size == ALIGN4){

> +      extern char cl_internal_copy_buffer_to_image_3d_align4_str[];

> +      extern size_t

> + cl_internal_copy_buffer_to_image_3d_align4_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,

> +                cl_internal_copy_buffer_to_image_3d_align4_str,

> +                (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size,

> NULL);

> +    }

> +    else{

>        extern char cl_internal_copy_buffer_to_image_3d_str[];

>        extern size_t cl_internal_copy_buffer_to_image_3d_str_size;

> 

>        ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,

>            cl_internal_copy_buffer_to_image_3d_str,

> (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);

> +    }

>    }

>    if (!ker)

>      return CL_OUT_OF_RESOURCES;

> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> new file mode 100644

> index 0000000..32f1f63

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> @@ -0,0 +1,19 @@

> +kernel void __cl_copy_buffer_to_image_3d_align16(__write_only

> image3d_t image, global uint4* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int dst_origin0, unsigned int dst_origin1,

> unsigned int dst_origin2,

> +                                        unsigned int src_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color = (uint4)(0);

> +  int4 dst_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  dst_coord.x = dst_origin0 + i;

> +  dst_coord.y = dst_origin1 + j;

> +  dst_coord.z = dst_origin2 + k;

> +  src_offset += (k * region1 + j) * region0 + i;

> +  color = buffer[src_offset];

> +  write_imageui(image, dst_coord, color); }

> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> new file mode 100644

> index 0000000..2ccbcf1

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> @@ -0,0 +1,19 @@

> +kernel void __cl_copy_buffer_to_image_3d_align4(__write_only

> image3d_t image, global uint* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int dst_origin0, unsigned int dst_origin1,

> unsigned int dst_origin2,

> +                                        unsigned int src_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color = (uint4)(0);

> +  int4 dst_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  dst_coord.x = dst_origin0 + i;

> +  dst_coord.y = dst_origin1 + j;

> +  dst_coord.z = dst_origin2 + k;

> +  src_offset += (k * region1 + j) * region0 + i;

> +  color.x = buffer[src_offset];

> +  write_imageui(image, dst_coord, color); }

> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> new file mode 100644

> index 0000000..e116d47

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> @@ -0,0 +1,20 @@

> +kernel void __cl_copy_image_3d_to_buffer_align16 ( __read_only

> image3d_t image, global uint4* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int src_origin0, unsigned int src_origin1,

> unsigned int src_origin2,

> +                                        unsigned int dst_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color;

> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |

> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

> +  int4 src_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  src_coord.x = src_origin0 + i;

> +  src_coord.y = src_origin1 + j;

> +  src_coord.z = src_origin2 + k;

> +  color = read_imageui(image, sampler, src_coord);

> +  dst_offset += (k * region1 + j) * region0 + i;

> +  *(buffer + dst_offset) = color;

> +}

> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> new file mode 100644

> index 0000000..d5374c4

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> @@ -0,0 +1,20 @@

> +kernel void __cl_copy_image_3d_to_buffer_align4 ( __read_only

> image3d_t image, global uint* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int src_origin0, unsigned int src_origin1,

> unsigned int src_origin2,

> +                                        unsigned int dst_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color;

> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |

> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

> +  int4 src_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  src_coord.x = src_origin0 + i;

> +  src_coord.y = src_origin1 + j;

> +  src_coord.z = src_origin2 + k;

> +  color = read_imageui(image, sampler, src_coord);

> +  dst_offset += (k * region1 + j) * region0 + i;

> +  buffer[dst_offset] = color.x;

> +}

> --

> 2.7.4

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet
Yan Wang June 14, 2017, 7:40 a.m.
Very thanks.



yan.wang
 
From: Yang, Rong R

Date: 2017-06-14 15:36
To: yan.wang@linux.intel.com; beignet@lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for large 3D image with TILE_Y.
LGTM, except some format. I have run git clang-format by manual and pushed, thanks.
 
> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> yan.wang@linux.intel.com

> Sent: Tuesday, June 13, 2017 16:32

> To: beignet@lists.freedesktop.org

> Cc: Yan Wang <yan.wang@linux.intel.com>

> Subject: [Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for

> large 3D image with TILE_Y.

> 

> From: Yan Wang <yan.wang@linux.intel.com>

> 

> It is similar with 2D image for avoiding extended image width truncated.

> 

> Signed-off-by: Yan Wang <yan.wang@linux.intel.com>

> ---

>  src/CMakeLists.txt                                 |  2 +

>  src/cl_context.h                                   |  4 ++

>  src/cl_mem.c                                       | 46 +++++++++++++++++++---

>  .../cl_internal_copy_buffer_to_image_3d_align16.cl | 19

> +++++++++  .../cl_internal_copy_buffer_to_image_3d_align4.cl  | 19

> +++++++++  .../cl_internal_copy_image_3d_to_buffer_align16.cl | 20

> ++++++++++  .../cl_internal_copy_image_3d_to_buffer_align4.cl  | 20

> ++++++++++

>  7 files changed, 125 insertions(+), 5 deletions(-)  create mode 100644

> src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

>  create mode 100644

> src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

>  create mode 100644

> src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

>  create mode 100644

> src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> 

> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87ad48b..ecb98b9

> 100644

> --- a/src/CMakeLists.txt

> +++ b/src/CMakeLists.txt

> @@ -54,6 +54,8 @@ cl_internal_copy_image_2d_array_to_3d

> cl_internal_copy_image_3d_to_2d_array

>  cl_internal_copy_image_2d_to_buffer

> cl_internal_copy_image_2d_to_buffer_align16

> cl_internal_copy_image_3d_to_buffer

>  cl_internal_copy_buffer_to_image_2d

> cl_internal_copy_buffer_to_image_2d_align16

> cl_internal_copy_buffer_to_image_3d

>  cl_internal_copy_buffer_to_image_2d_align4

> cl_internal_copy_image_2d_to_buffer_align4

> +cl_internal_copy_buffer_to_image_3d_align4

> +cl_internal_copy_image_3d_to_buffer_align4

> +cl_internal_copy_buffer_to_image_3d_align16

> +cl_internal_copy_image_3d_to_buffer_align16

>  cl_internal_fill_buf_align8 cl_internal_fill_buf_align4

>  cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign

>  cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git

> a/src/cl_context.h b/src/cl_context.h index 75bf895..b3a79bc 100644

> --- a/src/cl_context.h

> +++ b/src/cl_context.h

> @@ -64,10 +64,14 @@ enum _cl_internal_ker_type {

>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,

>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,

>    CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer

> +  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,

> +  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,

>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,   //copy buffer to image 3d

> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,

> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,

>    CL_ENQUEUE_FILL_BUFFER_UNALIGN,      //fill buffer with 1 aligne pattern,

> pattern size=1

>    CL_ENQUEUE_FILL_BUFFER_ALIGN2,       //fill buffer with 2 aligne pattern,

> pattern size=2

>    CL_ENQUEUE_FILL_BUFFER_ALIGN4,       //fill buffer with 4 aligne pattern,

> pattern size=4

> diff --git a/src/cl_mem.c b/src/cl_mem.c index b6dce3f..307db50 100644

> --- a/src/cl_mem.c

> +++ b/src/cl_mem.c

> @@ -2162,13 +2162,13 @@ get_align_size_for_copy_kernel(struct

> _cl_mem_image* image, const size_t origin0

>                              const size_t offset, cl_image_format *fmt) {

>    size_t align_size = 0;

> 

> -  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *

> image->bpp) % ALIGN16 == 0) &&

> +  if(((image->w * image->bpp) % ALIGN16 == 0) &&

>        ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0)

> && (offset % ALIGN16 == 0)){

>      fmt->image_channel_order = CL_RGBA;

>      fmt->image_channel_data_type = CL_UNSIGNED_INT32;

>      align_size = ALIGN16;

>    }

> -  else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image-

> >w * image->bpp) % ALIGN4 == 0) &&

> +  else if(((image->w * image->bpp) % ALIGN4 == 0) &&

>        ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) &&

> (offset % ALIGN4 == 0)){

>      fmt->image_channel_order = CL_R;

>      fmt->image_channel_data_type = CL_UNSIGNED_INT32; @@ -2247,11

> +2247,29 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue,

> cl_event event, struct _cl_m

>            cl_internal_copy_image_2d_to_buffer_str,

> (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);

>      }

>    }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {

> -    extern char cl_internal_copy_image_3d_to_buffer_str[];

> -    extern size_t cl_internal_copy_image_3d_to_buffer_str_size;

> +    if(align_size == ALIGN16){

> +      extern char cl_internal_copy_image_3d_to_buffer_align16_str[];

> +      extern size_t

> + cl_internal_copy_image_3d_to_buffer_align16_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,

> +                cl_internal_copy_image_3d_to_buffer_align16_str,

> +                (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size,

> NULL);

> +    }

> +    else if(align_size == ALIGN4){

> +      extern char cl_internal_copy_image_3d_to_buffer_align4_str[];

> +      extern size_t

> + cl_internal_copy_image_3d_to_buffer_align4_str_size;

> 

> -    ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,

> +                cl_internal_copy_image_3d_to_buffer_align4_str,

> +                (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size,

> NULL);

> +    }

> +    else{

> +      extern char cl_internal_copy_image_3d_to_buffer_str[];

> +      extern size_t cl_internal_copy_image_3d_to_buffer_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,

>            cl_internal_copy_image_3d_to_buffer_str,

> (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);

> +    }

>    }

> 

>    if (!ker) {

> @@ -2347,11 +2365,29 @@

> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event

> event, cl_mem buffe

>            cl_internal_copy_buffer_to_image_2d_str,

> (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);

>      }

>    }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {

> +    if(align_size == ALIGN16){

> +      extern char cl_internal_copy_buffer_to_image_3d_align16_str[];

> +      extern size_t

> + cl_internal_copy_buffer_to_image_3d_align16_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,

> +                cl_internal_copy_buffer_to_image_3d_align16_str,

> +                (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size,

> NULL);

> +    }

> +    else if(align_size == ALIGN4){

> +      extern char cl_internal_copy_buffer_to_image_3d_align4_str[];

> +      extern size_t

> + cl_internal_copy_buffer_to_image_3d_align4_str_size;

> +

> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,

> +                cl_internal_copy_buffer_to_image_3d_align4_str,

> +                (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size,

> NULL);

> +    }

> +    else{

>        extern char cl_internal_copy_buffer_to_image_3d_str[];

>        extern size_t cl_internal_copy_buffer_to_image_3d_str_size;

> 

>        ker = cl_context_get_static_kernel_from_bin(queue->ctx,

> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,

>            cl_internal_copy_buffer_to_image_3d_str,

> (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);

> +    }

>    }

>    if (!ker)

>      return CL_OUT_OF_RESOURCES;

> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> new file mode 100644

> index 0000000..32f1f63

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl

> @@ -0,0 +1,19 @@

> +kernel void __cl_copy_buffer_to_image_3d_align16(__write_only

> image3d_t image, global uint4* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int dst_origin0, unsigned int dst_origin1,

> unsigned int dst_origin2,

> +                                        unsigned int src_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color = (uint4)(0);

> +  int4 dst_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  dst_coord.x = dst_origin0 + i;

> +  dst_coord.y = dst_origin1 + j;

> +  dst_coord.z = dst_origin2 + k;

> +  src_offset += (k * region1 + j) * region0 + i;

> +  color = buffer[src_offset];

> +  write_imageui(image, dst_coord, color); }

> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> new file mode 100644

> index 0000000..2ccbcf1

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl

> @@ -0,0 +1,19 @@

> +kernel void __cl_copy_buffer_to_image_3d_align4(__write_only

> image3d_t image, global uint* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int dst_origin0, unsigned int dst_origin1,

> unsigned int dst_origin2,

> +                                        unsigned int src_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color = (uint4)(0);

> +  int4 dst_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  dst_coord.x = dst_origin0 + i;

> +  dst_coord.y = dst_origin1 + j;

> +  dst_coord.z = dst_origin2 + k;

> +  src_offset += (k * region1 + j) * region0 + i;

> +  color.x = buffer[src_offset];

> +  write_imageui(image, dst_coord, color); }

> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> new file mode 100644

> index 0000000..e116d47

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl

> @@ -0,0 +1,20 @@

> +kernel void __cl_copy_image_3d_to_buffer_align16 ( __read_only

> image3d_t image, global uint4* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int src_origin0, unsigned int src_origin1,

> unsigned int src_origin2,

> +                                        unsigned int dst_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color;

> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |

> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

> +  int4 src_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  src_coord.x = src_origin0 + i;

> +  src_coord.y = src_origin1 + j;

> +  src_coord.z = src_origin2 + k;

> +  color = read_imageui(image, sampler, src_coord);

> +  dst_offset += (k * region1 + j) * region0 + i;

> +  *(buffer + dst_offset) = color;

> +}

> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> new file mode 100644

> index 0000000..d5374c4

> --- /dev/null

> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl

> @@ -0,0 +1,20 @@

> +kernel void __cl_copy_image_3d_to_buffer_align4 ( __read_only

> image3d_t image, global uint* buffer,

> +                                        unsigned int region0, unsigned int region1, unsigned int

> region2,

> +                                        unsigned int src_origin0, unsigned int src_origin1,

> unsigned int src_origin2,

> +                                        unsigned int dst_offset) {

> +  int i = get_global_id(0);

> +  int j = get_global_id(1);

> +  int k = get_global_id(2);

> +  uint4 color;

> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |

> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

> +  int4 src_coord;

> +  if((i >= region0) || (j>= region1) || (k>=region2))

> +    return;

> +  src_coord.x = src_origin0 + i;

> +  src_coord.y = src_origin1 + j;

> +  src_coord.z = src_origin2 + k;

> +  color = read_imageui(image, sampler, src_coord);

> +  dst_offset += (k * region1 + j) * region0 + i;

> +  buffer[dst_offset] = color.x;

> +}

> --

> 2.7.4

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet