[Mesa-dev,06/10] i965: Change the winsys MSAA blits from blorp to meta.

Submitted by Eric Anholt on March 4, 2014, 10:17 p.m.

Details

Message ID 1393971464-12892-7-git-send-email-eric@anholt.net
State New
Headers show

Not browsing as part of any series.

Commit Message

Eric Anholt March 4, 2014, 10:17 p.m.
This gets us equivalent code paths on BDW and pre-BDW, except for stencil
(where we don't have MSAA stencil resolve code yet)
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 111 ++++++++++++++++++++++++--
 1 file changed, 103 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 202bb52..3c09a42 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -40,12 +40,16 @@ 
 #include "brw_blorp.h"
 #include "brw_context.h"
 
+#include "main/blit.h"
+#include "main/buffers.h"
 #include "main/enums.h"
+#include "main/fbobject.h"
 #include "main/formats.h"
 #include "main/glformats.h"
 #include "main/texcompress_etc.h"
 #include "main/teximage.h"
 #include "main/streaming-load-memcpy.h"
+#include "drivers/common/meta.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
@@ -1538,19 +1542,110 @@  intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
    return u;
 }
 
+static void
+intel_rb_storage_first_mt_slice(struct brw_context *brw,
+                                GLuint rbo,
+                                struct intel_mipmap_tree *mt)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_renderbuffer *rb;
+   struct intel_renderbuffer *irb;
+
+   /* This turns the GenRenderbuffers name into an actual struct
+    * intel_renderbuffer.
+    */
+   _mesa_BindRenderbuffer(GL_RENDERBUFFER, rbo);
+
+   rb = ctx->CurrentRenderbuffer;
+   irb = intel_renderbuffer(rb);
+
+   rb->Format = mt->format;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, mt->format);
+
+   rb->NumSamples = mt->num_samples;
+   rb->Width = mt->logical_width0;
+   rb->Height = mt->logical_height0;
+
+   intel_miptree_reference(&irb->mt, mt);
+}
+
+/**
+ * Implementation of up or downsampling for window-system MSAA miptrees.
+ */
+static void
+intel_miptree_msaa_copy(struct brw_context *brw,
+                        struct intel_mipmap_tree *src_mt,
+                        struct intel_mipmap_tree *dst_mt)
+{
+   struct gl_context *ctx = &brw->ctx;
+   GLuint rbos[2], fbos[2], src_rbo, dst_rbo, src_fbo, dst_fbo;
+   GLenum drawbuffer;
+   GLbitfield attachment, blit_bit;
+
+   if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_COMPONENT ||
+       _mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL) {
+      attachment = GL_DEPTH_ATTACHMENT;
+      drawbuffer = GL_NONE;
+      blit_bit = GL_DEPTH_BUFFER_BIT;
+   } else {
+      attachment = GL_COLOR_ATTACHMENT0;
+      drawbuffer = GL_COLOR_ATTACHMENT0;
+      blit_bit = GL_COLOR_BUFFER_BIT;
+   }
+
+   intel_batchbuffer_emit_mi_flush(brw);
+
+   _mesa_meta_begin(ctx, MESA_META_ALL);
+   _mesa_GenRenderbuffers(2, rbos);
+   _mesa_GenFramebuffers(2, fbos);
+   src_rbo = rbos[0];
+   dst_rbo = rbos[1];
+   src_fbo = fbos[0];
+   dst_fbo = fbos[1];
+
+   _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo);
+   intel_rb_storage_first_mt_slice(brw, src_rbo, src_mt);
+   _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, attachment,
+                                 GL_RENDERBUFFER, src_rbo);
+   _mesa_ReadBuffer(drawbuffer);
+
+   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo);
+   intel_rb_storage_first_mt_slice(brw, dst_rbo, dst_mt);
+   _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, attachment,
+                                 GL_RENDERBUFFER, dst_rbo);
+   _mesa_DrawBuffer(drawbuffer);
+
+   _mesa_BlitFramebuffer(0, 0,
+                         src_mt->logical_width0, src_mt->logical_height0,
+                         0, 0,
+                         dst_mt->logical_width0, dst_mt->logical_height0,
+                         blit_bit, GL_NEAREST);
+
+   _mesa_DeleteRenderbuffers(2, rbos);
+   _mesa_DeleteFramebuffers(2, fbos);
+
+   _mesa_meta_end(ctx);
+
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+
 void
 intel_miptree_updownsample(struct brw_context *brw,
                            struct intel_mipmap_tree *src,
                            struct intel_mipmap_tree *dst)
 {
-   brw_blorp_blit_miptrees(brw,
-                           src, 0 /* level */, 0 /* layer */,
-                           dst, 0 /* level */, 0 /* layer */,
-                           0, 0,
-                           src->logical_width0, src->logical_height0,
-                           0, 0,
-                           dst->logical_width0, dst->logical_height0,
-                           GL_NEAREST, false, false /*mirror x, y*/);
+   if (src->format == MESA_FORMAT_S_UINT8) {
+      brw_blorp_blit_miptrees(brw,
+                              src, 0 /* level */, 0 /* layer */,
+                              dst, 0 /* level */, 0 /* layer */,
+                              0, 0,
+                              src->logical_width0, src->logical_height0,
+                              0, 0,
+                              dst->logical_width0, dst->logical_height0,
+                              GL_NEAREST, false, false /*mirror x, y*/);
+   } else {
+      intel_miptree_msaa_copy(brw, src, dst);
+   }
 
    if (src->stencil_mt) {
       brw_blorp_blit_miptrees(brw,

Comments

On 03/04/2014 02:17 PM, Eric Anholt wrote:
> This gets us equivalent code paths on BDW and pre-BDW, except for stencil
> (where we don't have MSAA stencil resolve code yet)
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 111 ++++++++++++++++++++++++--
>  1 file changed, 103 insertions(+), 8 deletions(-)

A couple of comments below...

> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 202bb52..3c09a42 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -40,12 +40,16 @@
>  #include "brw_blorp.h"
>  #include "brw_context.h"
>  
> +#include "main/blit.h"
> +#include "main/buffers.h"
>  #include "main/enums.h"
> +#include "main/fbobject.h"
>  #include "main/formats.h"
>  #include "main/glformats.h"
>  #include "main/texcompress_etc.h"
>  #include "main/teximage.h"
>  #include "main/streaming-load-memcpy.h"
> +#include "drivers/common/meta.h"
>  
>  #define FILE_DEBUG_FLAG DEBUG_MIPTREE
>  
> @@ -1538,19 +1542,110 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
>     return u;
>  }
>  

/**
 * Creates a new named renderbuffer that wraps the first slice
 * of an existing miptree.
 *
 * Clobbers the current renderbuffer binding (ctx->CurrentRenderbuffer).
 */

> +static void
> +intel_rb_storage_first_mt_slice(struct brw_context *brw,
> +                                GLuint rbo,
> +                                struct intel_mipmap_tree *mt)
> +{
> +   struct gl_context *ctx = &brw->ctx;
> +   struct gl_renderbuffer *rb;
> +   struct intel_renderbuffer *irb;
> +

Perhaps move the GenRenderbuffers call into this function, and make it
return the RB number?  Having the name generation and actual
renderbuffer structure population in separate functions seems a little
strange to me.  Just an idea, though; I could be convinced otherwise.

> +   /* This turns the GenRenderbuffers name into an actual struct
> +    * intel_renderbuffer.
> +    */
> +   _mesa_BindRenderbuffer(GL_RENDERBUFFER, rbo);
> +
> +   rb = ctx->CurrentRenderbuffer;
> +   irb = intel_renderbuffer(rb);
> +
> +   rb->Format = mt->format;
> +   rb->_BaseFormat = _mesa_base_fbo_format(ctx, mt->format);
> +
> +   rb->NumSamples = mt->num_samples;
> +   rb->Width = mt->logical_width0;
> +   rb->Height = mt->logical_height0;
> +
> +   intel_miptree_reference(&irb->mt, mt);
> +}
> +
> +/**
> + * Implementation of up or downsampling for window-system MSAA miptrees.
> + */
> +static void
> +intel_miptree_msaa_copy(struct brw_context *brw,
> +                        struct intel_mipmap_tree *src_mt,
> +                        struct intel_mipmap_tree *dst_mt)

This code looks reasonable, but I'm not crazy about it living here in
intel_mipmap_tree.c with names like intel_mipmap_do_stuff().  Normally,
I think of intel_mipmap_tree.c/intel_mipmap_...() functions as the core
miptree support.

It's the first code outside of meta*.c that calls _mesa_meta_begin/end,
and the only code in intel_mipmap_tree.c that calls API functions like
_mesa_BindRenderbuffer.  So, it looks rather surprising at first glance.

Putting it in another file might help, but that may be overkill.  I'd at
least like to see "_meta_" in the names.  Perhaps
brw_meta_updownsample() or such?

> +{
> +   struct gl_context *ctx = &brw->ctx;
> +   GLuint rbos[2], fbos[2], src_rbo, dst_rbo, src_fbo, dst_fbo;
> +   GLenum drawbuffer;
> +   GLbitfield attachment, blit_bit;
> +
> +   if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_COMPONENT ||
> +       _mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL) {
> +      attachment = GL_DEPTH_ATTACHMENT;
> +      drawbuffer = GL_NONE;
> +      blit_bit = GL_DEPTH_BUFFER_BIT;
> +   } else {
> +      attachment = GL_COLOR_ATTACHMENT0;
> +      drawbuffer = GL_COLOR_ATTACHMENT0;
> +      blit_bit = GL_COLOR_BUFFER_BIT;
> +   }
> +
> +   intel_batchbuffer_emit_mi_flush(brw);
> +
> +   _mesa_meta_begin(ctx, MESA_META_ALL);
> +   _mesa_GenRenderbuffers(2, rbos);
> +   _mesa_GenFramebuffers(2, fbos);
> +   src_rbo = rbos[0];
> +   dst_rbo = rbos[1];
> +   src_fbo = fbos[0];
> +   dst_fbo = fbos[1];
> +
> +   _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo);
> +   intel_rb_storage_first_mt_slice(brw, src_rbo, src_mt);
> +   _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, attachment,
> +                                 GL_RENDERBUFFER, src_rbo);
> +   _mesa_ReadBuffer(drawbuffer);
> +
> +   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo);
> +   intel_rb_storage_first_mt_slice(brw, dst_rbo, dst_mt);
> +   _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, attachment,
> +                                 GL_RENDERBUFFER, dst_rbo);
> +   _mesa_DrawBuffer(drawbuffer);
> +
> +   _mesa_BlitFramebuffer(0, 0,
> +                         src_mt->logical_width0, src_mt->logical_height0,
> +                         0, 0,
> +                         dst_mt->logical_width0, dst_mt->logical_height0,
> +                         blit_bit, GL_NEAREST);
> +
> +   _mesa_DeleteRenderbuffers(2, rbos);
> +   _mesa_DeleteFramebuffers(2, fbos);
> +
> +   _mesa_meta_end(ctx);
> +
> +   intel_batchbuffer_emit_mi_flush(brw);
> +}
> +
>  void
>  intel_miptree_updownsample(struct brw_context *brw,
>                             struct intel_mipmap_tree *src,
>                             struct intel_mipmap_tree *dst)
>  {
> -   brw_blorp_blit_miptrees(brw,
> -                           src, 0 /* level */, 0 /* layer */,
> -                           dst, 0 /* level */, 0 /* layer */,
> -                           0, 0,
> -                           src->logical_width0, src->logical_height0,
> -                           0, 0,
> -                           dst->logical_width0, dst->logical_height0,
> -                           GL_NEAREST, false, false /*mirror x, y*/);
> +   if (src->format == MESA_FORMAT_S_UINT8) {
> +      brw_blorp_blit_miptrees(brw,
> +                              src, 0 /* level */, 0 /* layer */,
> +                              dst, 0 /* level */, 0 /* layer */,
> +                              0, 0,
> +                              src->logical_width0, src->logical_height0,
> +                              0, 0,
> +                              dst->logical_width0, dst->logical_height0,
> +                              GL_NEAREST, false, false /*mirror x, y*/);
> +   } else {
> +      intel_miptree_msaa_copy(brw, src, dst);
> +   }
>  
>     if (src->stencil_mt) {
>        brw_blorp_blit_miptrees(brw,
> 

I was concerned about switching away from BLORP on existing platforms without
any sort of indication that the performance doesn't regress.  Dave Airlie pointed
out that Xonotic uses window system multisampling, so I ran that and double
checked that this code was getting called.

On Iris Pro at 1400x1050 (windowed) and 4x MSAA,

x before
+ after
+--------------------------------------------------------------------------+
|                                          x     +                         |
|+               x       +    xx ++    *  +*+ x+ + x+xx  +      x +x    x x|
|                         |_____|_________A_M___MA_______|________|        |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  14     177.90398     180.94218     179.55409     179.59949    0.89145869
+  14     177.03936     180.52852     179.32322     179.20873    0.83237168
No difference proven at 95.0% confidence

So I think universally switching to the new code is reasonable.  Nice.

--Ken