[v2,1/2] vrend: If available use glCopyImageSubData to execute memcopy like blits

Submitted by Gert Wollny on July 3, 2018, 11:21 a.m.

Details

Message ID 20180703112148.901-2-gert.wollny@collabora.com
State New
Series "Fix GLES31 copy_image.non-compressed tests"
Headers show

Commit Message

Gert Wollny July 3, 2018, 11:21 a.m.
When the host is  gles >= 3.2, gl >= 4.3, or when the extension
GL_(ARB|EXT|OES)_copy_image is available, memcopy like blitting and region
copying can be done for many color format combinations by using
glCopyImageSubData.

This fixes a number of tests from the subset
  dEQP-GLES31.functional.copy_image.non_compressed.viewclass_*

v2: - Clean list of canonical formats (Gurchetan Singh)
    - Use size of canonical formats to decide whether they can be copied via
      gCopyImageSubData
    - Also honour the render state when deciding whether glCopyImageSubData
      will do, or whether we need to do a blit.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
---
 src/vrend_formats.c  | 40 +++++++++++++++++++++++++++++++
 src/vrend_renderer.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
 src/vrend_renderer.h |  2 ++
 3 files changed, 97 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/vrend_formats.c b/src/vrend_formats.c
index eb9f217..07f9985 100644
--- a/src/vrend_formats.c
+++ b/src/vrend_formats.c
@@ -443,3 +443,43 @@  void vrend_build_format_list_gles(void)
    */
   add_formats(gles_z32_format);
 }
+
+static uint vrend_is_canonical_format(enum pipe_format format)
+{
+   /* These are the canonical formats that are set by gallium */
+
+   switch (format) {
+   /* 128 bit */
+   case PIPE_FORMAT_R32G32B32A32_UINT:
+      return 128;
+
+    /* 64 bit */
+   case PIPE_FORMAT_R32G32_UINT:
+   case PIPE_FORMAT_R16G16B16A16_UINT:
+      return 64;
+
+   /* 32 bit */
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_R16G16_UNORM:
+   case PIPE_FORMAT_R32_UINT:
+      return 32;
+
+   /* 16 bit */
+   case PIPE_FORMAT_R8G8_UNORM:
+   case PIPE_FORMAT_R16_UINT:
+      return 16;
+
+   case PIPE_FORMAT_R8_UINT:
+      return 8;
+   default:
+      return 0;
+   }
+}
+
+bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs)
+{
+   uint lhss = vrend_is_canonical_format(lhs);
+   uint rhss = vrend_is_canonical_format(rhs);
+
+   return (lhss > 0 ) && (lhss == rhss);
+}
\ No newline at end of file
diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
index 23494a8..1bf33f1 100644
--- a/src/vrend_renderer.c
+++ b/src/vrend_renderer.c
@@ -119,6 +119,7 @@  struct global_renderer_state {
    bool have_texture_storage;
    bool have_tessellation;
    bool have_texture_view;
+   bool have_copy_image;
 
    /* these appeared broken on at least one driver */
    bool use_explicit_locations;
@@ -4527,6 +4528,12 @@  int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags)
    if (gl_ver >= 46 || epoxy_has_gl_extension("GL_ARB_polygon_offset_clamp"))
       vrend_state.have_polygon_offset_clamp = true;
 
+   if (gl_ver >= 43 || (gles && gl_ver >= 32) ||
+       epoxy_has_gl_extension("GL_ARB_copy_image") ||
+       epoxy_has_gl_extension("GL_EXT_copy_image") ||
+       epoxy_has_gl_extension("GL_OES_copy_image"))
+       vrend_state.have_copy_image = true;
+
    /* callbacks for when we are cleaning up the object table */
    vrend_resource_set_destroy_callback(vrend_destroy_resource_object);
    vrend_object_set_destroy_callback(VIRGL_OBJECT_QUERY, vrend_destroy_query_object);
@@ -5746,9 +5753,11 @@  static int vrend_transfer_send_readpixels(struct vrend_context *ctx,
 
    switch (elsize) {
    case 1:
+   case 3:
       glPixelStorei(GL_PACK_ALIGNMENT, 1);
       break;
    case 2:
+   case 6:
       glPixelStorei(GL_PACK_ALIGNMENT, 2);
       break;
    case 4:
@@ -6330,6 +6339,22 @@  static void vrend_resource_copy_fallback(struct vrend_resource *src_res,
    free(tptr);
 }
 
+
+static inline void
+vrend_copy_sub_image(struct vrend_resource* src_res, struct vrend_resource * dst_res,
+                     uint32_t src_level, const struct pipe_box *src_box,
+                     uint32_t dst_level, uint32_t dstx, uint32_t dsty, uint32_t dstz)
+{
+   glCopyImageSubData(src_res->id,
+                      tgsitargettogltarget(src_res->base.target, src_res->base.nr_samples),
+                      src_level, src_box->x, src_box->y, src_box->z,
+                      dst_res->id,
+                      tgsitargettogltarget(dst_res->base.target, dst_res->base.nr_samples),
+                      dst_level, dstx, dsty, dstz,
+                      src_box->width, src_box->height,src_box->depth);
+}
+
+
 void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
                                          uint32_t dst_handle, uint32_t dst_level,
                                          uint32_t dstx, uint32_t dsty, uint32_t dstz,
@@ -6362,11 +6387,21 @@  void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
       return;
    }
 
+   if (vrend_state.have_copy_image) {
+      const struct util_format_description *src_desc = util_format_description(src_res->base.format);
+      const struct util_format_description *dst_desc = util_format_description(dst_res->base.format);
+      if (util_is_format_compatible(src_desc,dst_desc) &&
+          src_res->base.nr_samples == dst_res->base.nr_samples) {
+         vrend_copy_sub_image(src_res, dst_res, src_level, src_box,
+                              dst_level, dstx, dsty, dstz);
+         return;
+      }
+   }
+
    if (!vrend_format_can_render(src_res->base.format) ||
        !vrend_format_can_render(dst_res->base.format)) {
       vrend_resource_copy_fallback(src_res, dst_res, dst_level, dstx,
                                    dsty, dstz, src_level, src_box);
-
       return;
    }
 
@@ -6638,7 +6673,25 @@  void vrend_renderer_blit(struct vrend_context *ctx,
    if (info->render_condition_enable == false)
       vrend_pause_render_condition(ctx, true);
 
-   vrend_renderer_blit_int(ctx, src_res, dst_res, info);
+   /* The gallium blit function can be called for a general blit that may
+    * scale, convert the data, and apply some rander states or if is called via
+    * glCopyImageSubData. For the latter case forward the call to the
+    * glCopyImageSubData function, otherwise use a framebuffer blit.
+    */
+   if (vrend_state.have_copy_image && !info->render_condition_enable &&
+       vrend_canonical_formats_of_same_size(info->src.format, info->dst.format) &&
+       !info->scissor_enable && (info->filter == PIPE_TEX_FILTER_NEAREST) &&
+       !info->alpha_blend && (info->mask == PIPE_MASK_RGBA) &&
+       (src_res->base.nr_samples == dst_res->base.nr_samples) &&
+       info->src.box.width == info->dst.box.width &&
+       info->src.box.height == info->dst.box.height &&
+       info->src.box.depth == info->dst.box.depth) {
+      vrend_copy_sub_image(src_res, dst_res, info->src.level, &info->src.box,
+                           info->dst.level, info->dst.box.x, info->dst.box.y,
+                           info->dst.box.z);
+   } else {
+      vrend_renderer_blit_int(ctx, src_res, dst_res, info);
+   }
 
    if (info->render_condition_enable == false)
       vrend_pause_render_condition(ctx, false);
diff --git a/src/vrend_renderer.h b/src/vrend_renderer.h
index d07d11c..f81f60a 100644
--- a/src/vrend_renderer.h
+++ b/src/vrend_renderer.h
@@ -325,6 +325,8 @@  GLint64 vrend_renderer_get_timestamp(void);
 void vrend_build_format_list_common(void);
 void vrend_build_format_list_gl(void);
 void vrend_build_format_list_gles(void);
+bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs);
+
 
 int vrend_renderer_resource_attach_iov(int res_handle, struct iovec *iov,
                                        int num_iovs);

Comments

Gurchetan Singh July 10, 2018, 8:07 p.m.
On Tue, Jul 3, 2018 at 4:21 AM Gert Wollny <gert.wollny@collabora.com> wrote:
>
> When the host is  gles >= 3.2, gl >= 4.3, or when the extension
> GL_(ARB|EXT|OES)_copy_image is available, memcopy like blitting and region
> copying can be done for many color format combinations by using
> glCopyImageSubData.
>
> This fixes a number of tests from the subset
>   dEQP-GLES31.functional.copy_image.non_compressed.viewclass_*
>
> v2: - Clean list of canonical formats (Gurchetan Singh)
>     - Use size of canonical formats to decide whether they can be copied via
>       gCopyImageSubData
>     - Also honour the render state when deciding whether glCopyImageSubData
>       will do, or whether we need to do a blit.
>
> Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
> ---
>  src/vrend_formats.c  | 40 +++++++++++++++++++++++++++++++
>  src/vrend_renderer.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
>  src/vrend_renderer.h |  2 ++
>  3 files changed, 97 insertions(+), 2 deletions(-)
>
> diff --git a/src/vrend_formats.c b/src/vrend_formats.c
> index eb9f217..07f9985 100644
> --- a/src/vrend_formats.c
> +++ b/src/vrend_formats.c
> @@ -443,3 +443,43 @@ void vrend_build_format_list_gles(void)
>     */
>    add_formats(gles_z32_format);
>  }
> +
> +static uint vrend_is_canonical_format(enum pipe_format format)
> +{
> +   /* These are the canonical formats that are set by gallium */
> +
> +   switch (format) {
> +   /* 128 bit */
> +   case PIPE_FORMAT_R32G32B32A32_UINT:
> +      return 128;
> +
> +    /* 64 bit */
> +   case PIPE_FORMAT_R32G32_UINT:
> +   case PIPE_FORMAT_R16G16B16A16_UINT:
> +      return 64;
> +
> +   /* 32 bit */
> +   case PIPE_FORMAT_R8G8B8A8_UNORM:
> +   case PIPE_FORMAT_R16G16_UNORM:
> +   case PIPE_FORMAT_R32_UINT:
> +      return 32;
> +
> +   /* 16 bit */
> +   case PIPE_FORMAT_R8G8_UNORM:
> +   case PIPE_FORMAT_R16_UINT:
> +      return 16;
> +
> +   case PIPE_FORMAT_R8_UINT:
> +      return 8;
> +   default:
> +      return 0;
> +   }
> +}
> +
> +bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs)
> +{
> +   uint lhss = vrend_is_canonical_format(lhs);
> +   uint rhss = vrend_is_canonical_format(rhs);
> +
> +   return (lhss > 0 ) && (lhss == rhss);
> +}
> \ No newline at end of file
> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> index 23494a8..1bf33f1 100644
> --- a/src/vrend_renderer.c
> +++ b/src/vrend_renderer.c
> @@ -119,6 +119,7 @@ struct global_renderer_state {
>     bool have_texture_storage;
>     bool have_tessellation;
>     bool have_texture_view;
> +   bool have_copy_image;
>
>     /* these appeared broken on at least one driver */
>     bool use_explicit_locations;
> @@ -4527,6 +4528,12 @@ int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags)
>     if (gl_ver >= 46 || epoxy_has_gl_extension("GL_ARB_polygon_offset_clamp"))
>        vrend_state.have_polygon_offset_clamp = true;
>
> +   if (gl_ver >= 43 || (gles && gl_ver >= 32) ||
> +       epoxy_has_gl_extension("GL_ARB_copy_image") ||
> +       epoxy_has_gl_extension("GL_EXT_copy_image") ||
> +       epoxy_has_gl_extension("GL_OES_copy_image"))
> +       vrend_state.have_copy_image = true;
> +
>     /* callbacks for when we are cleaning up the object table */
>     vrend_resource_set_destroy_callback(vrend_destroy_resource_object);
>     vrend_object_set_destroy_callback(VIRGL_OBJECT_QUERY, vrend_destroy_query_object);
> @@ -5746,9 +5753,11 @@ static int vrend_transfer_send_readpixels(struct vrend_context *ctx,
>
>     switch (elsize) {
>     case 1:
> +   case 3:
>        glPixelStorei(GL_PACK_ALIGNMENT, 1);
>        break;
>     case 2:
> +   case 6:
>        glPixelStorei(GL_PACK_ALIGNMENT, 2);
>        break;
>     case 4:
> @@ -6330,6 +6339,22 @@ static void vrend_resource_copy_fallback(struct vrend_resource *src_res,
>     free(tptr);
>  }
>
> +
> +static inline void
> +vrend_copy_sub_image(struct vrend_resource* src_res, struct vrend_resource * dst_res,
> +                     uint32_t src_level, const struct pipe_box *src_box,
> +                     uint32_t dst_level, uint32_t dstx, uint32_t dsty, uint32_t dstz)
> +{
> +   glCopyImageSubData(src_res->id,
> +                      tgsitargettogltarget(src_res->base.target, src_res->base.nr_samples),
> +                      src_level, src_box->x, src_box->y, src_box->z,
> +                      dst_res->id,
> +                      tgsitargettogltarget(dst_res->base.target, dst_res->base.nr_samples),
> +                      dst_level, dstx, dsty, dstz,
> +                      src_box->width, src_box->height,src_box->depth);
> +}
> +
> +
>  void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
>                                           uint32_t dst_handle, uint32_t dst_level,
>                                           uint32_t dstx, uint32_t dsty, uint32_t dstz,
> @@ -6362,11 +6387,21 @@ void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
>        return;
>     }
>
> +   if (vrend_state.have_copy_image) {
> +      const struct util_format_description *src_desc = util_format_description(src_res->base.format);
> +      const struct util_format_description *dst_desc = util_format_description(dst_res->base.format);
> +      if (util_is_format_compatible(src_desc,dst_desc) &&

nit: space after comma

> +          src_res->base.nr_samples == dst_res->base.nr_samples) {
> +         vrend_copy_sub_image(src_res, dst_res, src_level, src_box,
> +                              dst_level, dstx, dsty, dstz);
> +         return;
> +      }
> +   }
> +
>     if (!vrend_format_can_render(src_res->base.format) ||
>         !vrend_format_can_render(dst_res->base.format)) {
>        vrend_resource_copy_fallback(src_res, dst_res, dst_level, dstx,
>                                     dsty, dstz, src_level, src_box);
> -
>        return;
>     }
>
> @@ -6638,7 +6673,25 @@ void vrend_renderer_blit(struct vrend_context *ctx,
>     if (info->render_condition_enable == false)
>        vrend_pause_render_condition(ctx, true);
>
> -   vrend_renderer_blit_int(ctx, src_res, dst_res, info);
> +   /* The gallium blit function can be called for a general blit that may
> +    * scale, convert the data, and apply some rander states

nit: render

>        or if is called via
> +    * glCopyImageSubData. For the latter case forward the call to the
> +    * glCopyImageSubData function, otherwise use a framebuffer blit.
> +    */

This comment could be better.  glCopyImageSubData and other blit
functions are implemented on top of Gallium blit.  Gallium does magic
behind the scenes to make glCopyImageSubData work.  Our framebuffer
blit doesn't seem to take into account the instance
(src_res->base.format != info->src.format).  We need to detect this
and do glCopyImageSubData in that case.

We may be able to simplify this detection logic a bit.  My reading of
st_cb_copyimage.c leads me to believe:

1) pipe->blit() expects a per-channel memcpy -- that is, RGBA8 ->
BGRA8 flips the red and blue channels.  This based on the comments in
swizzled_copy, such as

"Only the swizzle is different, which means we can just blit"
"R32 -> BGRA8 is realized as RGBA8 -> BGRA8"
"BGRA8 -> R32 is realized as BGRA8 -> RGBA8"

2) That seems to imply glCopyImageSubData does per-channel memcpy too
when the formats are compatible (???).

Marek, Ilia -- is this understanding correct?

> +   if (vrend_state.have_copy_image && !info->render_condition_enable &&
> +       vrend_canonical_formats_of_same_size(info->src.format, info->dst.format) &&
> +       !info->scissor_enable && (info->filter == PIPE_TEX_FILTER_NEAREST) &&
> +       !info->alpha_blend && (info->mask == PIPE_MASK_RGBA) &&
> +       (src_res->base.nr_samples == dst_res->base.nr_samples) &&
> +       info->src.box.width == info->dst.box.width &&
> +       info->src.box.height == info->dst.box.height &&
> +       info->src.box.depth == info->dst.box.depth) {
> +      vrend_copy_sub_image(src_res, dst_res, info->src.level, &info->src.box,
> +                           info->dst.level, info->dst.box.x, info->dst.box.y,
> +                           info->dst.box.z);
> +   } else {
> +      vrend_renderer_blit_int(ctx, src_res, dst_res, info);
> +   }
>
>     if (info->render_condition_enable == false)
>        vrend_pause_render_condition(ctx, false);
> diff --git a/src/vrend_renderer.h b/src/vrend_renderer.h
> index d07d11c..f81f60a 100644
> --- a/src/vrend_renderer.h
> +++ b/src/vrend_renderer.h
> @@ -325,6 +325,8 @@ GLint64 vrend_renderer_get_timestamp(void);
>  void vrend_build_format_list_common(void);
>  void vrend_build_format_list_gl(void);
>  void vrend_build_format_list_gles(void);
> +bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs);
> +
>
>  int vrend_renderer_resource_attach_iov(int res_handle, struct iovec *iov,
>                                         int num_iovs);
> --
> 2.17.1
>
> _______________________________________________
> virglrenderer-devel mailing list
> virglrenderer-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/virglrenderer-devel
Marek Olšák July 10, 2018, 11:06 p.m.
On Tue, Jul 10, 2018 at 4:07 PM, Gurchetan Singh
<gurchetansingh@chromium.org> wrote:
> On Tue, Jul 3, 2018 at 4:21 AM Gert Wollny <gert.wollny@collabora.com> wrote:
>>
>> When the host is  gles >= 3.2, gl >= 4.3, or when the extension
>> GL_(ARB|EXT|OES)_copy_image is available, memcopy like blitting and region
>> copying can be done for many color format combinations by using
>> glCopyImageSubData.
>>
>> This fixes a number of tests from the subset
>>   dEQP-GLES31.functional.copy_image.non_compressed.viewclass_*
>>
>> v2: - Clean list of canonical formats (Gurchetan Singh)
>>     - Use size of canonical formats to decide whether they can be copied via
>>       gCopyImageSubData
>>     - Also honour the render state when deciding whether glCopyImageSubData
>>       will do, or whether we need to do a blit.
>>
>> Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
>> ---
>>  src/vrend_formats.c  | 40 +++++++++++++++++++++++++++++++
>>  src/vrend_renderer.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
>>  src/vrend_renderer.h |  2 ++
>>  3 files changed, 97 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/vrend_formats.c b/src/vrend_formats.c
>> index eb9f217..07f9985 100644
>> --- a/src/vrend_formats.c
>> +++ b/src/vrend_formats.c
>> @@ -443,3 +443,43 @@ void vrend_build_format_list_gles(void)
>>     */
>>    add_formats(gles_z32_format);
>>  }
>> +
>> +static uint vrend_is_canonical_format(enum pipe_format format)
>> +{
>> +   /* These are the canonical formats that are set by gallium */
>> +
>> +   switch (format) {
>> +   /* 128 bit */
>> +   case PIPE_FORMAT_R32G32B32A32_UINT:
>> +      return 128;
>> +
>> +    /* 64 bit */
>> +   case PIPE_FORMAT_R32G32_UINT:
>> +   case PIPE_FORMAT_R16G16B16A16_UINT:
>> +      return 64;
>> +
>> +   /* 32 bit */
>> +   case PIPE_FORMAT_R8G8B8A8_UNORM:
>> +   case PIPE_FORMAT_R16G16_UNORM:
>> +   case PIPE_FORMAT_R32_UINT:
>> +      return 32;
>> +
>> +   /* 16 bit */
>> +   case PIPE_FORMAT_R8G8_UNORM:
>> +   case PIPE_FORMAT_R16_UINT:
>> +      return 16;
>> +
>> +   case PIPE_FORMAT_R8_UINT:
>> +      return 8;
>> +   default:
>> +      return 0;
>> +   }
>> +}
>> +
>> +bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs)
>> +{
>> +   uint lhss = vrend_is_canonical_format(lhs);
>> +   uint rhss = vrend_is_canonical_format(rhs);
>> +
>> +   return (lhss > 0 ) && (lhss == rhss);
>> +}
>> \ No newline at end of file
>> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
>> index 23494a8..1bf33f1 100644
>> --- a/src/vrend_renderer.c
>> +++ b/src/vrend_renderer.c
>> @@ -119,6 +119,7 @@ struct global_renderer_state {
>>     bool have_texture_storage;
>>     bool have_tessellation;
>>     bool have_texture_view;
>> +   bool have_copy_image;
>>
>>     /* these appeared broken on at least one driver */
>>     bool use_explicit_locations;
>> @@ -4527,6 +4528,12 @@ int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags)
>>     if (gl_ver >= 46 || epoxy_has_gl_extension("GL_ARB_polygon_offset_clamp"))
>>        vrend_state.have_polygon_offset_clamp = true;
>>
>> +   if (gl_ver >= 43 || (gles && gl_ver >= 32) ||
>> +       epoxy_has_gl_extension("GL_ARB_copy_image") ||
>> +       epoxy_has_gl_extension("GL_EXT_copy_image") ||
>> +       epoxy_has_gl_extension("GL_OES_copy_image"))
>> +       vrend_state.have_copy_image = true;
>> +
>>     /* callbacks for when we are cleaning up the object table */
>>     vrend_resource_set_destroy_callback(vrend_destroy_resource_object);
>>     vrend_object_set_destroy_callback(VIRGL_OBJECT_QUERY, vrend_destroy_query_object);
>> @@ -5746,9 +5753,11 @@ static int vrend_transfer_send_readpixels(struct vrend_context *ctx,
>>
>>     switch (elsize) {
>>     case 1:
>> +   case 3:
>>        glPixelStorei(GL_PACK_ALIGNMENT, 1);
>>        break;
>>     case 2:
>> +   case 6:
>>        glPixelStorei(GL_PACK_ALIGNMENT, 2);
>>        break;
>>     case 4:
>> @@ -6330,6 +6339,22 @@ static void vrend_resource_copy_fallback(struct vrend_resource *src_res,
>>     free(tptr);
>>  }
>>
>> +
>> +static inline void
>> +vrend_copy_sub_image(struct vrend_resource* src_res, struct vrend_resource * dst_res,
>> +                     uint32_t src_level, const struct pipe_box *src_box,
>> +                     uint32_t dst_level, uint32_t dstx, uint32_t dsty, uint32_t dstz)
>> +{
>> +   glCopyImageSubData(src_res->id,
>> +                      tgsitargettogltarget(src_res->base.target, src_res->base.nr_samples),
>> +                      src_level, src_box->x, src_box->y, src_box->z,
>> +                      dst_res->id,
>> +                      tgsitargettogltarget(dst_res->base.target, dst_res->base.nr_samples),
>> +                      dst_level, dstx, dsty, dstz,
>> +                      src_box->width, src_box->height,src_box->depth);
>> +}
>> +
>> +
>>  void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
>>                                           uint32_t dst_handle, uint32_t dst_level,
>>                                           uint32_t dstx, uint32_t dsty, uint32_t dstz,
>> @@ -6362,11 +6387,21 @@ void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
>>        return;
>>     }
>>
>> +   if (vrend_state.have_copy_image) {
>> +      const struct util_format_description *src_desc = util_format_description(src_res->base.format);
>> +      const struct util_format_description *dst_desc = util_format_description(dst_res->base.format);
>> +      if (util_is_format_compatible(src_desc,dst_desc) &&
>
> nit: space after comma
>
>> +          src_res->base.nr_samples == dst_res->base.nr_samples) {
>> +         vrend_copy_sub_image(src_res, dst_res, src_level, src_box,
>> +                              dst_level, dstx, dsty, dstz);
>> +         return;
>> +      }
>> +   }
>> +
>>     if (!vrend_format_can_render(src_res->base.format) ||
>>         !vrend_format_can_render(dst_res->base.format)) {
>>        vrend_resource_copy_fallback(src_res, dst_res, dst_level, dstx,
>>                                     dsty, dstz, src_level, src_box);
>> -
>>        return;
>>     }
>>
>> @@ -6638,7 +6673,25 @@ void vrend_renderer_blit(struct vrend_context *ctx,
>>     if (info->render_condition_enable == false)
>>        vrend_pause_render_condition(ctx, true);
>>
>> -   vrend_renderer_blit_int(ctx, src_res, dst_res, info);
>> +   /* The gallium blit function can be called for a general blit that may
>> +    * scale, convert the data, and apply some rander states
>
> nit: render
>
>>        or if is called via
>> +    * glCopyImageSubData. For the latter case forward the call to the
>> +    * glCopyImageSubData function, otherwise use a framebuffer blit.
>> +    */
>
> This comment could be better.  glCopyImageSubData and other blit
> functions are implemented on top of Gallium blit.  Gallium does magic
> behind the scenes to make glCopyImageSubData work.  Our framebuffer
> blit doesn't seem to take into account the instance
> (src_res->base.format != info->src.format).  We need to detect this
> and do glCopyImageSubData in that case.
>
> We may be able to simplify this detection logic a bit.  My reading of
> st_cb_copyimage.c leads me to believe:
>
> 1) pipe->blit() expects a per-channel memcpy -- that is, RGBA8 ->
> BGRA8 flips the red and blue channels.  This based on the comments in
> swizzled_copy, such as
>
> "Only the swizzle is different, which means we can just blit"
> "R32 -> BGRA8 is realized as RGBA8 -> BGRA8"
> "BGRA8 -> R32 is realized as BGRA8 -> RGBA8"
>
> 2) That seems to imply glCopyImageSubData does per-channel memcpy too
> when the formats are compatible (???).
>
> Marek, Ilia -- is this understanding correct?

glCopyImageSubData views all images as having the RGBA component
order. BGRA doesn't exist. If Mesa uses BGRA internally and blits to
R32, it has to swap B and R, so that R32 looks like the source was
RGBA.

pipe->blit is a textured blit. It does exactly what drawing a textured
quad would do. There are no tricks there.

Marek
Gurchetan Singh July 11, 2018, 1:07 a.m.
On Tue, Jul 10, 2018 at 4:07 PM Marek Olšák <maraeo@gmail.com> wrote:
>
> On Tue, Jul 10, 2018 at 4:07 PM, Gurchetan Singh
> <gurchetansingh@chromium.org> wrote:
> > On Tue, Jul 3, 2018 at 4:21 AM Gert Wollny <gert.wollny@collabora.com> wrote:
> >>
> >> When the host is  gles >= 3.2, gl >= 4.3, or when the extension
> >> GL_(ARB|EXT|OES)_copy_image is available, memcopy like blitting and region
> >> copying can be done for many color format combinations by using
> >> glCopyImageSubData.
> >>
> >> This fixes a number of tests from the subset
> >>   dEQP-GLES31.functional.copy_image.non_compressed.viewclass_*
> >>
> >> v2: - Clean list of canonical formats (Gurchetan Singh)
> >>     - Use size of canonical formats to decide whether they can be copied via
> >>       gCopyImageSubData
> >>     - Also honour the render state when deciding whether glCopyImageSubData
> >>       will do, or whether we need to do a blit.
> >>
> >> Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
> >> ---
> >>  src/vrend_formats.c  | 40 +++++++++++++++++++++++++++++++
> >>  src/vrend_renderer.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
> >>  src/vrend_renderer.h |  2 ++
> >>  3 files changed, 97 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/src/vrend_formats.c b/src/vrend_formats.c
> >> index eb9f217..07f9985 100644
> >> --- a/src/vrend_formats.c
> >> +++ b/src/vrend_formats.c
> >> @@ -443,3 +443,43 @@ void vrend_build_format_list_gles(void)
> >>     */
> >>    add_formats(gles_z32_format);
> >>  }
> >> +
> >> +static uint vrend_is_canonical_format(enum pipe_format format)
> >> +{
> >> +   /* These are the canonical formats that are set by gallium */
> >> +
> >> +   switch (format) {
> >> +   /* 128 bit */
> >> +   case PIPE_FORMAT_R32G32B32A32_UINT:
> >> +      return 128;
> >> +
> >> +    /* 64 bit */
> >> +   case PIPE_FORMAT_R32G32_UINT:
> >> +   case PIPE_FORMAT_R16G16B16A16_UINT:
> >> +      return 64;
> >> +
> >> +   /* 32 bit */
> >> +   case PIPE_FORMAT_R8G8B8A8_UNORM:
> >> +   case PIPE_FORMAT_R16G16_UNORM:
> >> +   case PIPE_FORMAT_R32_UINT:
> >> +      return 32;
> >> +
> >> +   /* 16 bit */
> >> +   case PIPE_FORMAT_R8G8_UNORM:
> >> +   case PIPE_FORMAT_R16_UINT:
> >> +      return 16;
> >> +
> >> +   case PIPE_FORMAT_R8_UINT:
> >> +      return 8;
> >> +   default:
> >> +      return 0;
> >> +   }
> >> +}
> >> +
> >> +bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs)
> >> +{
> >> +   uint lhss = vrend_is_canonical_format(lhs);
> >> +   uint rhss = vrend_is_canonical_format(rhs);
> >> +
> >> +   return (lhss > 0 ) && (lhss == rhss);
> >> +}
> >> \ No newline at end of file
> >> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> >> index 23494a8..1bf33f1 100644
> >> --- a/src/vrend_renderer.c
> >> +++ b/src/vrend_renderer.c
> >> @@ -119,6 +119,7 @@ struct global_renderer_state {
> >>     bool have_texture_storage;
> >>     bool have_tessellation;
> >>     bool have_texture_view;
> >> +   bool have_copy_image;
> >>
> >>     /* these appeared broken on at least one driver */
> >>     bool use_explicit_locations;
> >> @@ -4527,6 +4528,12 @@ int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags)
> >>     if (gl_ver >= 46 || epoxy_has_gl_extension("GL_ARB_polygon_offset_clamp"))
> >>        vrend_state.have_polygon_offset_clamp = true;
> >>
> >> +   if (gl_ver >= 43 || (gles && gl_ver >= 32) ||
> >> +       epoxy_has_gl_extension("GL_ARB_copy_image") ||
> >> +       epoxy_has_gl_extension("GL_EXT_copy_image") ||
> >> +       epoxy_has_gl_extension("GL_OES_copy_image"))
> >> +       vrend_state.have_copy_image = true;
> >> +
> >>     /* callbacks for when we are cleaning up the object table */
> >>     vrend_resource_set_destroy_callback(vrend_destroy_resource_object);
> >>     vrend_object_set_destroy_callback(VIRGL_OBJECT_QUERY, vrend_destroy_query_object);
> >> @@ -5746,9 +5753,11 @@ static int vrend_transfer_send_readpixels(struct vrend_context *ctx,
> >>
> >>     switch (elsize) {
> >>     case 1:
> >> +   case 3:
> >>        glPixelStorei(GL_PACK_ALIGNMENT, 1);
> >>        break;
> >>     case 2:
> >> +   case 6:
> >>        glPixelStorei(GL_PACK_ALIGNMENT, 2);
> >>        break;
> >>     case 4:
> >> @@ -6330,6 +6339,22 @@ static void vrend_resource_copy_fallback(struct vrend_resource *src_res,
> >>     free(tptr);
> >>  }
> >>
> >> +
> >> +static inline void
> >> +vrend_copy_sub_image(struct vrend_resource* src_res, struct vrend_resource * dst_res,
> >> +                     uint32_t src_level, const struct pipe_box *src_box,
> >> +                     uint32_t dst_level, uint32_t dstx, uint32_t dsty, uint32_t dstz)
> >> +{
> >> +   glCopyImageSubData(src_res->id,
> >> +                      tgsitargettogltarget(src_res->base.target, src_res->base.nr_samples),
> >> +                      src_level, src_box->x, src_box->y, src_box->z,
> >> +                      dst_res->id,
> >> +                      tgsitargettogltarget(dst_res->base.target, dst_res->base.nr_samples),
> >> +                      dst_level, dstx, dsty, dstz,
> >> +                      src_box->width, src_box->height,src_box->depth);
> >> +}
> >> +
> >> +
> >>  void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
> >>                                           uint32_t dst_handle, uint32_t dst_level,
> >>                                           uint32_t dstx, uint32_t dsty, uint32_t dstz,
> >> @@ -6362,11 +6387,21 @@ void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
> >>        return;
> >>     }
> >>
> >> +   if (vrend_state.have_copy_image) {
> >> +      const struct util_format_description *src_desc = util_format_description(src_res->base.format);
> >> +      const struct util_format_description *dst_desc = util_format_description(dst_res->base.format);
> >> +      if (util_is_format_compatible(src_desc,dst_desc) &&
> >
> > nit: space after comma
> >
> >> +          src_res->base.nr_samples == dst_res->base.nr_samples) {
> >> +         vrend_copy_sub_image(src_res, dst_res, src_level, src_box,
> >> +                              dst_level, dstx, dsty, dstz);
> >> +         return;
> >> +      }
> >> +   }
> >> +
> >>     if (!vrend_format_can_render(src_res->base.format) ||
> >>         !vrend_format_can_render(dst_res->base.format)) {
> >>        vrend_resource_copy_fallback(src_res, dst_res, dst_level, dstx,
> >>                                     dsty, dstz, src_level, src_box);
> >> -
> >>        return;
> >>     }
> >>
> >> @@ -6638,7 +6673,25 @@ void vrend_renderer_blit(struct vrend_context *ctx,
> >>     if (info->render_condition_enable == false)
> >>        vrend_pause_render_condition(ctx, true);
> >>
> >> -   vrend_renderer_blit_int(ctx, src_res, dst_res, info);
> >> +   /* The gallium blit function can be called for a general blit that may
> >> +    * scale, convert the data, and apply some rander states
> >
> > nit: render
> >
> >>        or if is called via
> >> +    * glCopyImageSubData. For the latter case forward the call to the
> >> +    * glCopyImageSubData function, otherwise use a framebuffer blit.
> >> +    */
> >
> > This comment could be better.  glCopyImageSubData and other blit
> > functions are implemented on top of Gallium blit.  Gallium does magic
> > behind the scenes to make glCopyImageSubData work.  Our framebuffer
> > blit doesn't seem to take into account the instance
> > (src_res->base.format != info->src.format).  We need to detect this
> > and do glCopyImageSubData in that case.
> >
> > We may be able to simplify this detection logic a bit.  My reading of
> > st_cb_copyimage.c leads me to believe:
> >
> > 1) pipe->blit() expects a per-channel memcpy -- that is, RGBA8 ->
> > BGRA8 flips the red and blue channels.  This based on the comments in
> > swizzled_copy, such as
> >
> > "Only the swizzle is different, which means we can just blit"
> > "R32 -> BGRA8 is realized as RGBA8 -> BGRA8"
> > "BGRA8 -> R32 is realized as BGRA8 -> RGBA8"
> >
> > 2) That seems to imply glCopyImageSubData does per-channel memcpy too
> > when the formats are compatible (???).
> >
> > Marek, Ilia -- is this understanding correct?
>
> glCopyImageSubData views all images as having the RGBA component
> order. BGRA doesn't exist. If Mesa uses BGRA internally and blits to
> R32, it has to swap B and R, so that R32 looks like the source was
> RGBA.
>
> pipe->blit is a textured blit. It does exactly what drawing a textured
> quad would do. There are no tricks there.

Thanks for the clarifications.  gert.wollny@, would it be possible to
get rid of vrend_canonical_formats_of_same_size /
vrend_is_canonical_format, and do the following instead in
vrend_renderer_blit?

const struct util_format_description *src_desc =
util_format_description(info->src.format);
const struct util_format_description *dst_desc =
util_format_description(info->dest.format);
if (util_is_format_compatible(src_desc, dst_desc) && ...)
      vrend_copy_sub_image(..)

Gallium seems to make the formats equivalent in swizzled_copy:

If it's a RGBA8 -> R32 memcpy blit, has_identity_swizzle(src_desc) is
true, making it a R32 -> R32 blit
If it's a RG16 -> RGBA8 memcpy blit, has_identity_swizzle(src_desc) is
true, making it a RGBA8 -> RGBA8 blit

We shouldn't need to worry about BGRA8 --> RGBA8 blits -- that'll be
handled on the host side.

> Marek
Gert Wollny July 11, 2018, 7:37 a.m.
[...]
> Thanks for the clarifications.  gert.wollny@, would it be possible to
> get rid of vrend_canonical_formats_of_same_size /
> vrend_is_canonical_format, and do the following instead in
> vrend_renderer_blit?
> 
> const struct util_format_description *src_desc =
> util_format_description(info->src.format);
> const struct util_format_description *dst_desc =
> util_format_description(info->dest.format);
> if (util_is_format_compatible(src_desc, dst_desc) && ...)
>       vrend_copy_sub_image(..)

Good point, I actually used that already in *resource_copy_region,
makes sense to use it in blit too.

Best, 
Gert
Gert Wollny July 11, 2018, 9:58 a.m.
Am Mittwoch, den 11.07.2018, 09:37 +0200 schrieb Gert Wollny:
> [...]
> > Thanks for the clarifications.  gert.wollny@, would it be possible
> > to
> > get rid of vrend_canonical_formats_of_same_size /
> > vrend_is_canonical_format, and do the following instead in
> > vrend_renderer_blit?
> > 
> > const struct util_format_description *src_desc =
> > util_format_description(info->src.format);
> > const struct util_format_description *dst_desc =
> > util_format_description(info->dest.format);
> > if (util_is_format_compatible(src_desc, dst_desc) && ...)
> >       vrend_copy_sub_image(..)
> 
> Good point, I actually used that already in *resource_copy_region,
> makes sense to use it in blit too.
util_is_format_compatible is actually to restrictive, e.g. the
canonical formats for e.g. RG16 and RGBA8 are different, but
glCopyImageSubData can (and should) copy this, so I'll implement a
short version that can also take the compressed formats etc. 

> 
> Best, 
> Gert
>
Gurchetan Singh July 12, 2018, 3:43 a.m.
On Wed, Jul 11, 2018 at 2:58 AM Gert Wollny <gert.wollny@collabora.com> wrote:
>
> Am Mittwoch, den 11.07.2018, 09:37 +0200 schrieb Gert Wollny:
> > [...]
> > > Thanks for the clarifications.  gert.wollny@, would it be possible
> > > to
> > > get rid of vrend_canonical_formats_of_same_size /
> > > vrend_is_canonical_format, and do the following instead in
> > > vrend_renderer_blit?
> > >
> > > const struct util_format_description *src_desc =
> > > util_format_description(info->src.format);
> > > const struct util_format_description *dst_desc =
> > > util_format_description(info->dest.format);
> > > if (util_is_format_compatible(src_desc, dst_desc) && ...)
> > >       vrend_copy_sub_image(..)
> >
> > Good point, I actually used that already in *resource_copy_region,
> > makes sense to use it in blit too.
> util_is_format_compatible is actually to restrictive, e.g. the
> canonical formats for e.g. RG16 and RGBA8 are different,

Which particular tests show that the canonical formats for RG16 /
RGBA8 are different?  I ran:

dEQP-GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16f_r32i.texture2d_to_texture2d

and see that swizzled_copy makes blit_dst_format and blit_src_format
PIPE_FORMAT_R8G8B8A8_UNORM.

> but glCopyImageSubData can (and should) copy this, so I'll implement a
> short version that can also take the compressed formats etc.
>
> >
> > Best,
> > Gert
> >
Gert Wollny July 12, 2018, 7:37 a.m.
Am Mittwoch, den 11.07.2018, 20:43 -0700 schrieb Gurchetan Singh:
> 
> Which particular tests show that the canonical formats for RG16 /
> RGBA8 are different?  I ran:
> 
> dEQP-
> GLES31.functional.copy_image.non_compressed.viewclass_32_bits.rg16f_r
> 32i.texture2d_to_texture2d
> 
> and see that swizzled_copy makes blit_dst_format and blit_src_format
> PIPE_FORMAT_R8G8B8A8_UNORM.
> 

Now that I re-checked it, it seems all is fine, yesterday I thought I
have seen piglits failing that were passing with the other test, but I
can't reproduce this now. 

best, 
Gert