[Mesa-dev,v2] i965: Add GPU BLIT of texture image to PBO in Intel driver

Submitted by Jon Ashburn on March 5, 2014, 12:34 a.m.

Details

Message ID 1393979684-26314-1-git-send-email-jon@lunarg.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Jon Ashburn March 5, 2014, 12:34 a.m.
Add Intel driver hook for glGetTexImage to accelerate the case of reading
texture image into a PBO.  This case gets huge performance gains by using
GPU BLIT directly to PBO rather than GPU BLIT to temporary texture followed
by memcpy.

No regressions on Piglit tests  with Intel driver.
Performance gain (1280 x 800 FBO, Ivybridge):
glGetTexImage + glMapBufferRange  with patch 1.45 msec
glGetTexImage + glMapBufferRange without patch 4.68 msec
---
 src/mesa/drivers/dri/i965/intel_tex_image.c | 105 ++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index ee02e68..13a34d5 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -15,6 +15,8 @@ 
 #include "main/teximage.h"
 #include "main/texstore.h"
 
+#include "drivers/common/meta.h"
+
 #include "intel_mipmap_tree.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
@@ -415,10 +417,113 @@  intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
                                   image->tile_x, image->tile_y);
 }
 
+static bool
+IntelBlitTexToPbo(struct gl_context * ctx,
+                   GLenum format, GLenum type,
+                   GLvoid * pixels, struct gl_texture_image *texImage)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   struct brw_context *brw = brw_context(ctx);
+   const struct gl_pixelstore_attrib *pack = &(ctx->Pack);
+   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
+   GLuint dst_offset;
+   drm_intel_bo *dst_buffer;
+   GLenum target = texImage->TexObject->Target;
+
+   /*
+    * Check if we can use GPU blit to copy from the hardware texture
+    * format to the user's format/type.
+    * Note that GL's pixel transfer ops don't apply to glGetTexImage()
+    */
+
+   if (!_mesa_format_matches_format_and_type(
+          intelImage->mt->format, format, type, false))
+   {
+      perf_debug("%s: unsupported format, fallback to CPU mapping for PBO\n", __FUNCTION__);
+
+      return false;
+   }
+
+   if (ctx->_ImageTransferState) {
+      perf_debug("%s: bad transfer state, fallback to CPU mapping for PBO\n", __FUNCTION__);
+      return false;
+   }
+
+   if (pack->SwapBytes || pack->LsbFirst) {
+      perf_debug("%s: unsupported pack swap params\n", __FUNCTION__);
+      return false;
+   }
+
+   if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_CUBE_MAP_ARRAY ||
+       target == GL_TEXTURE_2D_ARRAY) {
+      perf_debug("%s: no support for array textures, fallback to CPU mapping for PBO\n", __FUNCTION__);
+      return false;
+   }
+
+   int dst_stride = _mesa_image_row_stride(pack, texImage->Width, format, type);
+   bool dst_flip = false;
+   /* Mesa flips the dst_stride for ctx->Pack.Invert, our mt must have a
+    * normal dst_stride.
+    */
+   struct gl_pixelstore_attrib uninverted_pack = *pack;
+   if (ctx->Pack.Invert) {
+      dst_stride = -dst_stride;
+      dst_flip = true;
+      uninverted_pack.Invert = false;
+   }
+   dst_offset = (GLintptr) pixels;
+   dst_offset += _mesa_image_offset(2, &uninverted_pack, texImage->Width,
+                                    texImage->Height, format, type, 0, 0, 0);
+   dst_buffer = intel_bufferobj_buffer(brw, dst, dst_offset,
+                                       texImage->Height * dst_stride);
+
+   struct intel_mipmap_tree *pbo_mt =
+            intel_miptree_create_for_bo(brw,
+                                        dst_buffer,
+                                        intelImage->mt->format,
+                                        dst_offset,
+                                        texImage->Width, texImage->Height,
+                                        dst_stride, I915_TILING_NONE);
+
+   if (!pbo_mt)
+      return false;
+
+   if (!intel_miptree_blit(brw,
+                           intelImage->mt, texImage->Level, texImage->Face,
+                           0, 0, false,
+                           pbo_mt, 0, 0,
+                           0, 0, dst_flip,
+                           texImage->Width, texImage->Height, GL_COPY))
+      return false;
+
+   intel_miptree_release(&pbo_mt);
+
+   return true;
+}
+
+static void
+intel_get_tex_image(struct gl_context *ctx,
+                       GLenum format, GLenum type, GLvoid *pixels,
+                       struct gl_texture_image *texImage) {
+   DBG("%s\n", __FUNCTION__);
+
+   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
+      /* Using PBOs, so try the BLT based path. */
+      if (IntelBlitTexToPbo(ctx, format, type, pixels, texImage))
+         return;
+
+   }
+
+   _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage);
+
+   DBG("%s - DONE\n", __FUNCTION__);
+}
+
 void
 intelInitTextureImageFuncs(struct dd_function_table *functions)
 {
    functions->TexImage = intelTexImage;
    functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
    functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;
+   functions->GetTexImage = intel_get_tex_image;
 }

Comments

On Tuesday, March 04, 2014 05:34:44 PM Jon Ashburn wrote:
> Add Intel driver hook for glGetTexImage to accelerate the case of reading
> texture image into a PBO.  This case gets huge performance gains by using
> GPU BLIT directly to PBO rather than GPU BLIT to temporary texture followed
> by memcpy.
> 
> No regressions on Piglit tests  with Intel driver.
> Performance gain (1280 x 800 FBO, Ivybridge):
> glGetTexImage + glMapBufferRange  with patch 1.45 msec
> glGetTexImage + glMapBufferRange without patch 4.68 msec
> ---
>  src/mesa/drivers/dri/i965/intel_tex_image.c | 105 
++++++++++++++++++++++++++++
>  1 file changed, 105 insertions(+)

I fixed up a bunch of small issues with this patch and pushed it.

> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index ee02e68..13a34d5 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -15,6 +15,8 @@
>  #include "main/teximage.h"
>  #include "main/texstore.h"
>  
> +#include "drivers/common/meta.h"
> +
>  #include "intel_mipmap_tree.h"
>  #include "intel_buffer_objects.h"
>  #include "intel_batchbuffer.h"
> @@ -415,10 +417,113 @@ intel_image_target_texture_2d(struct gl_context *ctx, 
GLenum target,
>                                    image->tile_x, image->tile_y);
>  }
>  
> +static bool
> +IntelBlitTexToPbo(struct gl_context * ctx,

We've been trying to move away from camelCase, and never used StudlyCaps.  
I've renamed this to "blit_texture_to_pbo" (we don't need the intel prefix on 
static functions).

> +                   GLenum format, GLenum type,
> +                   GLvoid * pixels, struct gl_texture_image *texImage)
> +{
> +   struct intel_texture_image *intelImage = intel_texture_image(texImage);
> +   struct brw_context *brw = brw_context(ctx);
> +   const struct gl_pixelstore_attrib *pack = &(ctx->Pack);
> +   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
> +   GLuint dst_offset;
> +   drm_intel_bo *dst_buffer;
> +   GLenum target = texImage->TexObject->Target;
> +
> +   /*

Comments should start directly after /*.  Fixed.

> +    * Check if we can use GPU blit to copy from the hardware texture
> +    * format to the user's format/type.
> +    * Note that GL's pixel transfer ops don't apply to glGetTexImage()
> +    */
> +
> +   if (!_mesa_format_matches_format_and_type(
> +          intelImage->mt->format, format, type, false))

I reformatted this slightly.

> +   {
> +      perf_debug("%s: unsupported format, fallback to CPU mapping for 
PBO\n", __FUNCTION__);

Wrap at 80 columns.  Fixed.

> +
> +      return false;
> +   }
> +
> +   if (ctx->_ImageTransferState) {
> +      perf_debug("%s: bad transfer state, fallback to CPU mapping for 
PBO\n", __FUNCTION__);
> +      return false;
> +   }
> +
> +   if (pack->SwapBytes || pack->LsbFirst) {
> +      perf_debug("%s: unsupported pack swap params\n", __FUNCTION__);
> +      return false;
> +   }
> +
> +   if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_CUBE_MAP_ARRAY 
||
> +       target == GL_TEXTURE_2D_ARRAY) {

This path regresses Piglit's "getteximage-targets 3D" test, which would suffer 
from the same "multiple slices" problem.  (I see you worked on that test - 
perhaps the 3D case didn't exist when you tested this patch?)  I blacklisted
GL_TEXTURE_3D here to avoid that bug.

> +      perf_debug("%s: no support for array textures, fallback to CPU 
mapping for PBO\n", __FUNCTION__);
> +      return false;
> +   }
> +
> +   int dst_stride = _mesa_image_row_stride(pack, texImage->Width, format, 
type);
> +   bool dst_flip = false;
> +   /* Mesa flips the dst_stride for ctx->Pack.Invert, our mt must have a
> +    * normal dst_stride.
> +    */
> +   struct gl_pixelstore_attrib uninverted_pack = *pack;
> +   if (ctx->Pack.Invert) {
> +      dst_stride = -dst_stride;
> +      dst_flip = true;
> +      uninverted_pack.Invert = false;
> +   }
> +   dst_offset = (GLintptr) pixels;
> +   dst_offset += _mesa_image_offset(2, &uninverted_pack, texImage->Width,
> +                                    texImage->Height, format, type, 0, 0, 
0);
> +   dst_buffer = intel_bufferobj_buffer(brw, dst, dst_offset,
> +                                       texImage->Height * dst_stride);
> +
> +   struct intel_mipmap_tree *pbo_mt =
> +            intel_miptree_create_for_bo(brw,

Weird whitespace.  Fixed.

> +                                        dst_buffer,
> +                                        intelImage->mt->format,
> +                                        dst_offset,
> +                                        texImage->Width, texImage->Height,
> +                                        dst_stride, I915_TILING_NONE);

Eric removed the tiling parameter in April, so this didn't compile.  Fixed.

Thanks for the patch.  Sorry for not getting to review it earlier...