[2/3] enable create image 2d from buffer in clCreateImage.

Submitted by Luo, Xionghu on July 23, 2015, 2:54 a.m.

Details

Message ID 1437620054-10449-2-git-send-email-xionghu.luo@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Luo, Xionghu July 23, 2015, 2:54 a.m.
From: Luo Xionghu <xionghu.luo@intel.com>

this patch allows create 2d image with a cl buffer with zero copy.

Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
---
 src/cl_api.c        |   3 +-
 src/cl_device_id.c  |   1 +
 src/cl_device_id.h  |   1 +
 src/cl_extensions.c |   2 +
 src/cl_gt_device.h  |   1 +
 src/cl_mem.c        | 109 +++++++++++++++++++++++++++++++++++++++-------------
 src/cl_mem.h        |   1 +
 7 files changed, 90 insertions(+), 28 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/cl_api.c b/src/cl_api.c
index 69eb0bc..28783fd 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -549,8 +549,9 @@  clCreateImage(cl_context context,
     goto error;
   }
   /* buffer refers to a valid buffer memory object if image_type is
-     CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
+     CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must be NULL. */
   if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
+      image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
          image_desc->buffer) {
     err = CL_INVALID_IMAGE_DESCRIPTOR;
     goto error;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index e9e2c16..0713456 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -808,6 +808,7 @@  cl_get_device_info(cl_device_id     device,
     DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
     DECL_FIELD(PARTITION_TYPE, partition_type)
     DECL_FIELD(REFERENCE_COUNT, device_reference_count)
+    DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
 
     case CL_DRIVER_VERSION:
       if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 6daa31c..a14b3c8 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -115,6 +115,7 @@  struct _cl_device_id {
   cl_device_partition_property partition_type[3];
   cl_uint      device_reference_count;
   uint32_t atomic_test_result;
+  uint32_t image_pitch_alignment;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index 3eb303f..6cb1579 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -46,6 +46,8 @@  void check_opt1_extension(cl_extensions_t *extensions)
     if (id == EXT_ID(khr_spir))
       extensions->extensions[id].base.ext_enabled = 1;
 #endif
+    if (id == EXT_ID(khr_image2d_from_buffer))
+      extensions->extensions[id].base.ext_enabled = 1;
   }
 }
 
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 26bb8e8..fa63040 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -125,4 +125,5 @@  DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
 .affinity_domain = 0,
 .partition_type = {0},
 .device_reference_count = 1,
+.image_pitch_alignment = 1,
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b5671bd..cff2625 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,6 +266,7 @@  cl_mem_allocate(enum cl_mem_type type,
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
   mem->is_userptr = 0;
+  mem->is_image_from_buffer = 0;
   mem->offset = 0;
 
   if (sz != 0) {
@@ -298,7 +299,7 @@  cl_mem_allocate(enum cl_mem_type type,
             }
           }
         }
-        else if (flags & CL_MEM_ALLOC_HOST_PTR) {
+        else if  (flags & CL_MEM_ALLOC_HOST_PTR) {
           const size_t alignedSZ = ALIGN(sz, page_size);
           void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
           mem->host_ptr = internal_host_ptr;
@@ -308,10 +309,19 @@  cl_mem_allocate(enum cl_mem_type type,
       }
     }
 
-    if (!mem->is_userptr)
+    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+      // if the image if created from buffer, should use the bo directly to share same bo.
+      mem->bo = ((cl_mem)host_ptr)->bo;
+      mem->is_image_from_buffer = 1;
+    } else  if (!mem->is_userptr)
       mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #else
-    mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+      // if the image if created from buffer, should use the bo directly to share same bo.
+      mem->bo = ((cl_mem)host_ptr)->bo;
+      mem->is_image_from_buffer = 1;
+    } else
+      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #endif
 
     if (UNLIKELY(mem->bo == NULL)) {
@@ -756,6 +766,8 @@  _cl_mem_new_image(cl_context ctx,
       h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
       w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
       tiling = CL_NO_TILE;
+    } else if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
+      tiling = CL_NO_TILE;
     } else if (cl_driver_get_ver(ctx->drv) != 6) {
       /* Pick up tiling mode (we do only linear on SNB) */
       tiling = cl_get_default_tiling(ctx->drv);
@@ -804,7 +816,10 @@  _cl_mem_new_image(cl_context ctx,
   /* Tiling requires to align both pitch and height */
   if (tiling == CL_NO_TILE) {
     aligned_pitch = w * bpp;
-    aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+    if(image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+      aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+    else
+      aligned_h = h;
   } else if (tiling == CL_TILE_X) {
     aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
     aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
@@ -814,6 +829,11 @@  _cl_mem_new_image(cl_context ctx,
   }
 
   sz = aligned_pitch * aligned_h * depth;
+  if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER)  {
+    //image 2d created from buffer: the buffer sz maybe larger than the image 2d.
+    if( ((cl_mem)data)->size > sz)
+      sz = ((cl_mem)data)->size;
+  }
 
   /* If sz is large than 128MB, map gtt may fail in some system.
      Because there is no obviours performance drop, disable tiling. */
@@ -825,7 +845,7 @@  _cl_mem_new_image(cl_context ctx,
   }
 
   if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
-    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, &err);
   else {
     mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
     if (mem != NULL && err == CL_SUCCESS) {
@@ -837,7 +857,12 @@  _cl_mem_new_image(cl_context ctx,
   if (mem == NULL || err != CL_SUCCESS)
     goto error;
 
-  cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+  if(!(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER))
+  {
+    //no need set tiling if image 2d created from buffer since share same bo.
+    cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+  }
+
   if (image_type == CL_MEM_OBJECT_IMAGE1D ||
       image_type == CL_MEM_OBJECT_IMAGE2D ||
       image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
@@ -971,33 +996,53 @@  _cl_mem_new_image_from_buffer(cl_context ctx,
   if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
     goto error;
 
-  // Per bspec, a image should has a at least 2 line vertical alignment,
-  // thus we can't simply attach a buffer to a 1d image surface which has the same size.
-  // We have to create a new image, and copy the buffer data to this new image.
-  // And replace all the buffer object's reference to this image.
-  image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
+    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
+                             image_desc->buffer, errcode_ret);
+  } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+    // Per bspec, a image should has a at least 2 line vertical alignment,
+    // thus we can't simply attach a buffer to a 1d image surface which has the same size.
+    // We have to create a new image, and copy the buffer data to this new image.
+    // And replace all the buffer object's reference to this image.
+    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
                     mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
+  }
+  else
+    assert(0);
+
   if (image == NULL)
     return NULL;
-  void *src = cl_mem_map(buffer, 0);
-  void *dst = cl_mem_map(image, 1);
-  //
-  // FIXME, we could use copy buffer to image to do this on GPU latter.
-  // currently the copy buffer to image function doesn't support 1D image.
-  // 
-  // There is a potential risk that this buffer was mapped and the caller
-  // still hold the pointer and want to access it again. This scenario is
-  // not explicitly forbidden in the spec, although it should not be permitted.
-  memcpy(dst, src, mem_buffer->base.size);
-  cl_mem_unmap(buffer);
-  cl_mem_unmap(image);
+
+  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D)
+  {
+    //no need copy since the image 2d and buffer share same bo.
+  }
+  else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+  {
+    // FIXME, we could use copy buffer to image to do this on GPU latter.
+    // currently the copy buffer to image function doesn't support 1D image.
+    //
+    // There is a potential risk that this buffer was mapped and the caller
+    // still hold the pointer and want to access it again. This scenario is
+    // not explicitly forbidden in the spec, although it should not be permitted.
+    void *src = cl_mem_map(buffer, 0);
+    void *dst = cl_mem_map(image, 1);
+    memcpy(dst, src, mem_buffer->base.size);
+    cl_mem_unmap(image);
+    cl_mem_unmap(buffer);
+  }
+  else
+    assert(0);
 
   if (err != 0)
     goto error;
  
   // Now replace buffer's bo to this new bo, need to take care of sub buffer
   // case. 
-  cl_mem_replace_buffer(buffer, image->bo);
+  if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+    cl_mem_replace_buffer(buffer, image->bo);
   /* Now point to the right offset if buffer is a SUB_BUFFER. */
   if (buffer->flags & CL_MEM_USE_HOST_PTR)
     image->host_ptr = buffer->host_ptr + offset;
@@ -1025,12 +1070,20 @@  cl_mem_new_image(cl_context context,
 {
   switch (image_desc->image_type) {
   case CL_MEM_OBJECT_IMAGE1D:
-  case CL_MEM_OBJECT_IMAGE2D:
   case CL_MEM_OBJECT_IMAGE3D:
     return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
                              image_desc->image_width, image_desc->image_height, image_desc->image_depth,
                              image_desc->image_row_pitch, image_desc->image_slice_pitch,
                              host_ptr, errcode_ret);
+  case CL_MEM_OBJECT_IMAGE2D:
+    if(image_desc->buffer)
+      return _cl_mem_new_image_from_buffer(context, flags, image_format,
+                             image_desc, errcode_ret);
+    else
+      return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
+                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
+                             host_ptr, errcode_ret);
   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
     return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
@@ -1064,8 +1117,10 @@  cl_mem_delete(cl_mem mem)
 
   /* iff we are a image, delete the 1d buffer if has. */
   if (IS_IMAGE(mem)) {
-    if (cl_mem_image(mem)->buffer_1d) {
-      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
+    if(mem->is_image_from_buffer)
+      mem->bo = NULL;
+    else if (cl_mem_image(mem)->buffer_1d) {
+      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
       cl_mem_delete(cl_mem_image(mem)->buffer_1d);
       cl_mem_image(mem)->buffer_1d = NULL;
     }
diff --git a/src/cl_mem.h b/src/cl_mem.h
index e027f15..cdbc6da 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -94,6 +94,7 @@  typedef  struct _cl_mem {
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
   uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
+  uint8_t is_image_from_buffer;       /* IMAGE from Buffer*/
   size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
 } _cl_mem;