[Mesa-dev,PATCHv2] i965: Avoid extraneous fast depth clears

Submitted by Chia-I Wu on Jan. 3, 2014, 6:28 a.m.

Details

Message ID 1388730525-5793-1-git-send-email-olvaffe@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Chia-I Wu Jan. 3, 2014, 6:28 a.m.
When the depth buffer is already cleared, skip GEN6_HIZ_OP_DEPTH_CLEAR.  This
is made possible by tracking which slices have been cleared in
"struct intel_mipmap_level".  The hiz_cleared flag is unset when the depth
buffer is rendered to or when a HiZ resolve is needed.

For Unigine Tropics, the FPS improvement is 1.32134% +/- 0.161878% (n=13).

v2:
- unset hiz_cleared automatically in intel_miptree_slice_set_needs_hiz_resolve
- set/unset hiz_cleared with intel_renderbuffer_att_set_needs_depth_resolve

Signed-off-by: Chia-I Wu <olv@lunarg.com>
---
 src/mesa/drivers/dri/i965/brw_clear.c         | 54 +++++++++++++++++++--------
 src/mesa/drivers/dri/i965/brw_draw.c          |  2 +-
 src/mesa/drivers/dri/i965/intel_fbo.c         | 18 ++++++++-
 src/mesa/drivers/dri/i965/intel_fbo.h         |  4 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 39 +++++++++++++++++++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 +++++++++++
 6 files changed, 118 insertions(+), 20 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 1cac996..8622584 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -164,34 +164,58 @@  brw_fast_clear_depth(struct gl_context *ctx)
       break;
    }
 
+   unsigned num_layers_cleared = 0;
+   bool clear_all_layers = false;
+
    /* If we're clearing to a new clear value, then we need to resolve any clear
     * flags out of the HiZ buffer into the real depth buffer.
     */
    if (mt->depth_clear_value != depth_clear_value) {
       intel_miptree_all_slices_resolve_depth(brw, mt);
       mt->depth_clear_value = depth_clear_value;
-   }
 
-   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
-    *
-    *     "If other rendering operations have preceded this clear, a
-    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
-    *      must be issued before the rectangle primitive used for the depth
-    *      buffer clear operation.
-    */
-   intel_batchbuffer_emit_mi_flush(brw);
+      clear_all_layers = true;
+   }
 
    if (fb->NumLayers > 0) {
       assert(fb->NumLayers == depth_irb->mt->level[depth_irb->mt_level].depth);
       for (unsigned layer = 0; layer < fb->NumLayers; layer++) {
-         intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
-                        GEN6_HIZ_OP_DEPTH_CLEAR);
+         if (clear_all_layers ||
+             !intel_miptree_slice_get_hiz_cleared(mt,
+                                                  depth_irb->mt_level,
+                                                  layer)) {
+            /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
+             *
+             *     "If other rendering operations have preceded this clear, a
+             *      PIPE_CONTROL with write cache flush enabled and Z-inhibit
+             *      disabled must be issued before the rectangle primitive used
+             *      for the depth buffer clear operation.
+             */
+            if (num_layers_cleared == 0)
+               intel_batchbuffer_emit_mi_flush(brw);
+
+            intel_hiz_exec(brw, mt, depth_irb->mt_level, layer,
+                           GEN6_HIZ_OP_DEPTH_CLEAR);
+
+            num_layers_cleared++;
+         }
       }
    } else {
-      intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
-                     GEN6_HIZ_OP_DEPTH_CLEAR);
+      if (clear_all_layers ||
+          !intel_miptree_slice_get_hiz_cleared(mt,
+                                               depth_irb->mt_level,
+                                               depth_irb->mt_layer)) {
+         intel_batchbuffer_emit_mi_flush(brw);
+         intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
+                        GEN6_HIZ_OP_DEPTH_CLEAR);
+
+         num_layers_cleared = 1;
+      }
    }
 
+   if (num_layers_cleared == 0)
+      return true;
+
    if (brw->gen == 6) {
       /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
        *
@@ -203,9 +227,9 @@  brw_fast_clear_depth(struct gl_context *ctx)
    }
 
    /* Now, the HiZ buffer contains data that needs to be resolved to the depth
-    * buffer.
+    * buffer.  And set its cleared state to avoid unnecessary clears.
     */
-   intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
+   intel_renderbuffer_att_set_needs_depth_resolve(depth_att, true);
 
    return true;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b898cd3..2138174 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -364,7 +364,7 @@  static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
    if (back_irb)
       intel_renderbuffer_set_needs_downsample(back_irb);
    if (depth_irb && ctx->Depth.Mask)
-      intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
+      intel_renderbuffer_att_set_needs_depth_resolve(depth_att, false);
 }
 
 /* May fail if out of video memory for texture or vbo upload, or on
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index d3b9dcf..a54ecbb 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -829,17 +829,31 @@  intel_renderbuffer_resolve_hiz(struct brw_context *brw,
    return false;
 }
 
+/**
+ * Set that the depth buffer needs to be resolved.  This should be called when
+ * the renderbuffer is rendered or cleared, and we want to distinguish them so
+ * that we know if the HiZ is in a "cleared" state.
+ */
 void
-intel_renderbuffer_att_set_needs_depth_resolve(struct gl_renderbuffer_attachment *att)
+intel_renderbuffer_att_set_needs_depth_resolve(struct gl_renderbuffer_attachment *att,
+                                               bool cleared)
 {
    struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
    if (irb->mt) {
       if (att->Layered) {
-         intel_miptree_set_all_slices_need_depth_resolve(irb->mt, irb->mt_level);
+         intel_miptree_set_all_slices_need_depth_resolve(irb->mt,
+                                                         irb->mt_level);
+         intel_miptree_set_all_slices_hiz_cleared(irb->mt,
+                                                  irb->mt_level,
+                                                  cleared);
       } else {
          intel_miptree_slice_set_needs_depth_resolve(irb->mt,
                                                      irb->mt_level,
                                                      irb->mt_layer);
+         intel_miptree_slice_set_hiz_cleared(irb->mt,
+                                             irb->mt_level,
+                                             irb->mt_layer,
+                                             cleared);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
index fb50f86..d67c702 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -164,8 +164,8 @@  bool
 intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb);
 
 void
-intel_renderbuffer_att_set_needs_depth_resolve(struct gl_renderbuffer_attachment *att);
-
+intel_renderbuffer_att_set_needs_depth_resolve(struct gl_renderbuffer_attachment *att,
+                                               bool cleared);
 
 /**
  * \brief Perform a HiZ resolve on the renderbuffer.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index de47143..5debbae 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1397,6 +1397,42 @@  intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
 }
 
 void
+intel_miptree_slice_set_hiz_cleared(struct intel_mipmap_tree *mt,
+                                    uint32_t level,
+                                    uint32_t layer,
+                                    bool cleared)
+{
+   if (!intel_miptree_slice_has_hiz(mt, level, layer))
+      return;
+
+   mt->level[level].slice[layer].hiz_cleared = cleared;
+}
+
+void
+intel_miptree_set_all_slices_hiz_cleared(struct intel_mipmap_tree *mt,
+                                         uint32_t level,
+                                         bool cleared)
+{
+   uint32_t layer;
+   uint32_t end_layer = mt->level[level].depth;
+
+   for (layer = 0; layer < end_layer; layer++) {
+      intel_miptree_slice_set_hiz_cleared(mt, level, layer, cleared);
+   }
+}
+
+bool
+intel_miptree_slice_get_hiz_cleared(struct intel_mipmap_tree *mt,
+                                    uint32_t level,
+                                    uint32_t layer)
+{
+   if (!intel_miptree_slice_has_hiz(mt, level, layer))
+      return false;
+
+   return mt->level[level].slice[layer].hiz_cleared;
+}
+
+void
 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
 					  uint32_t level,
 					  uint32_t layer)
@@ -1406,6 +1442,9 @@  intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
 
    intel_resolve_map_set(&mt->hiz_map,
 			 level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+
+   /* HiZ should no longer be considered cleared when it needs a resolve */
+   intel_miptree_slice_set_hiz_cleared(mt, level, layer, false);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 329eeb0..960a0df 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -162,6 +162,11 @@  struct intel_mipmap_level
        * resides at \c mt->hiz_mt->level[l].slice[s].
        */
       bool has_hiz;
+
+      /**
+       * Is HiZ cleared for this slice?
+       */
+      bool hiz_cleared;
    } *slice;
 };
 
@@ -633,6 +638,22 @@  intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
                             uint32_t layer);
 
 void
+intel_miptree_slice_set_hiz_cleared(struct intel_mipmap_tree *mt,
+                                    uint32_t level,
+                                    uint32_t layer,
+                                    bool cleared);
+
+void
+intel_miptree_set_all_slices_hiz_cleared(struct intel_mipmap_tree *mt,
+                                         uint32_t level,
+                                         bool cleared);
+
+bool
+intel_miptree_slice_get_hiz_cleared(struct intel_mipmap_tree *mt,
+                                    uint32_t level,
+                                    uint32_t layer);
+
+void
 intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
                                           uint32_t level,
 					  uint32_t depth);

Comments

On Fri, Jan 03, 2014 at 02:28:45PM +0800, Chia-I Wu wrote:
> When the depth buffer is already cleared, skip GEN6_HIZ_OP_DEPTH_CLEAR.  This
> is made possible by tracking which slices have been cleared in
> "struct intel_mipmap_level".  The hiz_cleared flag is unset when the depth
> buffer is rendered to or when a HiZ resolve is needed.
> 
> For Unigine Tropics, the FPS improvement is 1.32134% +/- 0.161878% (n=13).
> 
> v2:
> - unset hiz_cleared automatically in intel_miptree_slice_set_needs_hiz_resolve
> - set/unset hiz_cleared with intel_renderbuffer_att_set_needs_depth_resolve
> 
> Signed-off-by: Chia-I Wu <olv@lunarg.com>
> ---
>  src/mesa/drivers/dri/i965/brw_clear.c         | 54 +++++++++++++++++++--------
>  src/mesa/drivers/dri/i965/brw_draw.c          |  2 +-
>  src/mesa/drivers/dri/i965/intel_fbo.c         | 18 ++++++++-
>  src/mesa/drivers/dri/i965/intel_fbo.h         |  4 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 39 +++++++++++++++++++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 +++++++++++
>  6 files changed, 118 insertions(+), 20 deletions(-)

This patch looks good to me.
Reviewed-by: Chad Versace <chad.versace@linux.intel.com>

By the way, the patch no longer cleanly applies to master, but the
conflicts look trivial.