[2/3] i965/gen6-7/sol: Keep independent counters for the current and previous begin/end block.

Submitted by Francisco Jerez on Nov. 17, 2017, 10:28 p.m.

Details

Message ID 20171117222849.19850-2-currojerez@riseup.net
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Francisco Jerez Nov. 17, 2017, 10:28 p.m.
This allows us to aggregate the primitive counts of a completed
transform feedback begin/end block lazily, which in the most typical
case (where glDrawTransformFeedback is not used) will allow us to
avoid aggregating the primitive counters on the CPU altogether,
preventing a stall on previous rendering during
glBeginTransformFeedback(), which dramatically improves performance of
applications that rely heavily on transform feedback.

Improves performance SynMark2 OglGSCloth by 65.52% ±0.25% (data
gathered on VLV).
---
 src/mesa/drivers/dri/i965/brw_context.h    |  9 ++++---
 src/mesa/drivers/dri/i965/gen6_sol.c       | 39 +++++++++++++++++-------------
 src/mesa/drivers/dri/i965/gen7_sol_state.c | 15 ++++++------
 3 files changed, 36 insertions(+), 27 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6a36cb12c84..c0b1c9f9075 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -545,6 +545,12 @@  struct brw_transform_feedback_object {
     */
    struct brw_transform_feedback_counter counter;
 
+   /**
+    * Count of primitives generated during the previous transform feedback
+    * operation.  Used to implement DrawTransformFeedback().
+    */
+   struct brw_transform_feedback_counter previous_counter;
+
    /**
     * Number of vertices written between last Begin/EndTransformFeedback().
     *
@@ -1486,9 +1492,6 @@  brw_resume_transform_feedback(struct gl_context *ctx,
 void
 brw_save_primitives_written_counters(struct brw_context *brw,
                                      struct brw_transform_feedback_object *obj);
-void
-brw_compute_xfb_vertices_written(struct brw_context *brw,
-                                 struct brw_transform_feedback_object *obj);
 GLsizei
 brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
                                         struct gl_transform_feedback_object *obj,
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index a909339e166..b1baf01bcd9 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -288,6 +288,8 @@  brw_save_primitives_written_counters(struct brw_context *brw,
 
    /* Check if there's enough space for a new pair of four values. */
    if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) {
+      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
+                                           &obj->previous_counter);
       aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
                                            &obj->counter);
    }
@@ -316,6 +318,7 @@  brw_save_primitives_written_counters(struct brw_context *brw,
 static void
 compute_vertices_written_so_far(struct brw_context *brw,
                                 struct brw_transform_feedback_object *obj,
+                                struct brw_transform_feedback_counter *counter,
                                 uint64_t *vertices_written)
 {
    const struct gl_context *ctx = &brw->ctx;
@@ -336,25 +339,26 @@  compute_vertices_written_so_far(struct brw_context *brw,
    }
 
    /* Get the number of primitives generated. */
-   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, &obj->counter);
+   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
 
    for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
-      vertices_written[i] = vertices_per_prim * obj->counter.accum[i];
+      vertices_written[i] = vertices_per_prim * counter->accum[i];
    }
 }
 
 /**
- * Compute the number of vertices written by this transform feedback operation.
+ * Compute the number of vertices written by the last transform feedback
+ * begin/end block.
  */
-void
-brw_compute_xfb_vertices_written(struct brw_context *brw,
-                                 struct brw_transform_feedback_object *obj)
+static void
+compute_xfb_vertices_written(struct brw_context *brw,
+                             struct brw_transform_feedback_object *obj)
 {
    if (obj->vertices_written_valid || !obj->base.EndedAnytime)
       return;
 
-   compute_vertices_written_so_far(brw, obj, obj->vertices_written);
-
+   compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
+                                   obj->vertices_written);
    obj->vertices_written_valid = true;
 }
 
@@ -376,7 +380,7 @@  brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
    assert(obj->EndedAnytime);
    assert(stream < ctx->Const.MaxVertexStreams);
 
-   brw_compute_xfb_vertices_written(brw, brw_obj);
+   compute_xfb_vertices_written(brw, brw_obj);
    return brw_obj->vertices_written[stream];
 }
 
@@ -431,13 +435,6 @@  brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
       ADVANCE_BATCH();
    }
 
-   /* We're about to lose the information needed to compute the number of
-    * vertices written during the last Begin/EndTransformFeedback section,
-    * so we can't delay it any further.
-    */
-   brw_compute_xfb_vertices_written(brw, brw_obj);
-   brw_reset_transform_feedback_counter(&brw_obj->counter);
-
    /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    brw_save_primitives_written_counters(brw, brw_obj);
 
@@ -456,6 +453,14 @@  brw_end_transform_feedback(struct gl_context *ctx,
    if (!obj->Paused)
       brw_save_primitives_written_counters(brw, brw_obj);
 
+   /* We've reached the end of a transform feedback begin/end block.  This
+    * means that future DrawTransformFeedback() calls will need to pick up the
+    * results of the current counter, and that it's time to roll back the
+    * current primitive counter to zero.
+    */
+   brw_obj->previous_counter = brw_obj->counter;
+   brw_reset_transform_feedback_counter(&brw_obj->counter);
+
    /* EndTransformFeedback() means that we need to update the number of
     * vertices written.  Since it's only necessary if DrawTransformFeedback()
     * is called and it means mapping a buffer object, we delay computing it
@@ -490,7 +495,7 @@  brw_resume_transform_feedback(struct gl_context *ctx,
 
    /* Reload SVBI 0 with the count of vertices written so far. */
    uint64_t svbi;
-   compute_vertices_written_so_far(brw, brw_obj, &svbi);
+   compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index f097e2250be..3801c73f654 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -45,13 +45,6 @@  gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
 
    assert(brw->screen->devinfo.gen == 7);
 
-   /* We're about to lose the information needed to compute the number of
-    * vertices written during the last Begin/EndTransformFeedback section,
-    * so we can't delay it any further.
-    */
-   brw_compute_xfb_vertices_written(brw, brw_obj);
-   brw_reset_transform_feedback_counter(&brw_obj->counter);
-
    /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
    brw_save_primitives_written_counters(brw, brw_obj);
 
@@ -86,6 +79,14 @@  gen7_end_transform_feedback(struct gl_context *ctx,
    if (!obj->Paused)
       brw_save_primitives_written_counters(brw, brw_obj);
 
+   /* We've reached the end of a transform feedback begin/end block.  This
+    * means that future DrawTransformFeedback() calls will need to pick up the
+    * results of the current counter, and that it's time to roll back the
+    * current primitive counter to zero.
+    */
+   brw_obj->previous_counter = brw_obj->counter;
+   brw_reset_transform_feedback_counter(&brw_obj->counter);
+
    /* EndTransformFeedback() means that we need to update the number of
     * vertices written.  Since it's only necessary if DrawTransformFeedback()
     * is called and it means mapping a buffer object, we delay computing it