[Mesa-dev,v3] i965: Implement GL_PRIMITIVES_GENERATED with non-zero streams.

Submitted by Iago Toral Quiroga on June 26, 2014, 6:24 a.m.

Details

Message ID 1403763849-333-1-git-send-email-itoral@igalia.com
State Accepted
Commit 3178d2474ae5bdd1102fb3d76a60d1d63c961ff5
Headers show

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga June 26, 2014, 6:24 a.m.
So far we have been using CL_INVOCATION_COUNT to resolve this query but this
is no good with streams, as only stream 0 reaches the clipping stage. Instead
we will use SO_PRIM_STORAGE_NEEDED which can keep track of the primitives sent
to each individual stream.

Since SO_PRIM_STORAGE_NEEDED is related to the SOL stage and according to
ARB_transform_feedback3 we need to be able to query primitives generated in
each stream whether transform feedback is active or not what we do is to
enable the SOL unit even if transform feedback is not active but disable all
output buffers in that case. This effectively disables transform feedback
but permits activation of statistics enabling SO_PRIM_STORAGE_NEEDED even
when transform feedback is not active.
---
 src/mesa/drivers/dri/i965/gen6_queryobj.c  | 13 +++++++++----
 src/mesa/drivers/dri/i965/gen7_sol_state.c | 20 +++++++++++++++++---
 2 files changed, 26 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 0cb64ca..b4b1509 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -84,11 +84,16 @@  brw_store_register_mem64(struct brw_context *brw,
 
 static void
 write_primitives_generated(struct brw_context *brw,
-                           drm_intel_bo *query_bo, int idx)
+                           drm_intel_bo *query_bo, int stream, int idx)
 {
    intel_batchbuffer_emit_mi_flush(brw);
 
-   brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
+   if (brw->gen >= 7) {
+      brw_store_register_mem64(brw, query_bo,
+                               GEN7_SO_PRIM_STORAGE_NEEDED(stream), idx);
+   } else {
+      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
+   }
 }
 
 static void
@@ -240,7 +245,7 @@  gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
       break;
 
    case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, 0);
+      write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
       break;
 
    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
@@ -279,7 +284,7 @@  gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
       break;
 
    case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, 1);
+      write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
       break;
 
    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 11b2e2e..d2c3ae3 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -223,14 +223,28 @@  upload_3dstate_streamout(struct brw_context *brw, bool active,
    uint32_t dw1 = 0, dw2 = 0;
    int i;
 
+   /*
+    * From ARB_transform_feedback3:
+    *
+    *   "When a generated primitive query for a vertex stream is active, the
+    *   primitives-generated count is incremented every time a primitive
+    *   emitted to that stream reaches the Discarding Rasterization stage
+    *   (see Section 3.x) right before rasterization. This counter is
+    *   incremented whether or not transform feedback is active."
+    *
+    * Since we can only keep track of generated primitives for each stream
+    * in the SOL stage we need to make sure it is always active even if
+    * transform beedback is not. This way we can track primitives generated
+    * in each stream via SO_PRIMITIVE_STORAGE_NEEDED.
+    */
+   dw1 |= SO_FUNCTION_ENABLE;
+   dw1 |= SO_STATISTICS_ENABLE;
+
    if (active) {
       int urb_entry_read_offset = 0;
       int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
 	 urb_entry_read_offset;
 
-      dw1 |= SO_FUNCTION_ENABLE;
-      dw1 |= SO_STATISTICS_ENABLE;
-
       /* _NEW_LIGHT */
       if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
 	 dw1 |= SO_REORDER_TRAILING;

Comments

Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

On Thu, Jun 26, 2014 at 6:24 PM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> So far we have been using CL_INVOCATION_COUNT to resolve this query but this
> is no good with streams, as only stream 0 reaches the clipping stage. Instead
> we will use SO_PRIM_STORAGE_NEEDED which can keep track of the primitives sent
> to each individual stream.
>
> Since SO_PRIM_STORAGE_NEEDED is related to the SOL stage and according to
> ARB_transform_feedback3 we need to be able to query primitives generated in
> each stream whether transform feedback is active or not what we do is to
> enable the SOL unit even if transform feedback is not active but disable all
> output buffers in that case. This effectively disables transform feedback
> but permits activation of statistics enabling SO_PRIM_STORAGE_NEEDED even
> when transform feedback is not active.
> ---
>  src/mesa/drivers/dri/i965/gen6_queryobj.c  | 13 +++++++++----
>  src/mesa/drivers/dri/i965/gen7_sol_state.c | 20 +++++++++++++++++---
>  2 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 0cb64ca..b4b1509 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -84,11 +84,16 @@ brw_store_register_mem64(struct brw_context *brw,
>
>  static void
>  write_primitives_generated(struct brw_context *brw,
> -                           drm_intel_bo *query_bo, int idx)
> +                           drm_intel_bo *query_bo, int stream, int idx)
>  {
>     intel_batchbuffer_emit_mi_flush(brw);
>
> -   brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
> +   if (brw->gen >= 7) {
> +      brw_store_register_mem64(brw, query_bo,
> +                               GEN7_SO_PRIM_STORAGE_NEEDED(stream), idx);
> +   } else {
> +      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
> +   }
>  }
>
>  static void
> @@ -240,7 +245,7 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
>        break;
>
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 0);
> +      write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
>        break;
>
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> @@ -279,7 +284,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
>        break;
>
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 1);
> +      write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
>        break;
>
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 11b2e2e..d2c3ae3 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -223,14 +223,28 @@ upload_3dstate_streamout(struct brw_context *brw, bool active,
>     uint32_t dw1 = 0, dw2 = 0;
>     int i;
>
> +   /*
> +    * From ARB_transform_feedback3:
> +    *
> +    *   "When a generated primitive query for a vertex stream is active, the
> +    *   primitives-generated count is incremented every time a primitive
> +    *   emitted to that stream reaches the Discarding Rasterization stage
> +    *   (see Section 3.x) right before rasterization. This counter is
> +    *   incremented whether or not transform feedback is active."
> +    *
> +    * Since we can only keep track of generated primitives for each stream
> +    * in the SOL stage we need to make sure it is always active even if
> +    * transform beedback is not. This way we can track primitives generated
> +    * in each stream via SO_PRIMITIVE_STORAGE_NEEDED.
> +    */
> +   dw1 |= SO_FUNCTION_ENABLE;
> +   dw1 |= SO_STATISTICS_ENABLE;
> +
>     if (active) {
>        int urb_entry_read_offset = 0;
>        int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
>          urb_entry_read_offset;
>
> -      dw1 |= SO_FUNCTION_ENABLE;
> -      dw1 |= SO_STATISTICS_ENABLE;
> -
>        /* _NEW_LIGHT */
>        if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
>          dw1 |= SO_REORDER_TRAILING;
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Thu, 2014-06-26 at 08:24 +0200, Iago Toral Quiroga wrote:
> So far we have been using CL_INVOCATION_COUNT to resolve this query 
> but this
> is no good with streams, as only stream 0 reaches the clipping 
> stage. Instead
> we will use SO_PRIM_STORAGE_NEEDED which can keep track of the 
> primitives sent
> to each individual stream.
>  
> Since SO_PRIM_STORAGE_NEEDED is related to the SOL stage and 
> according to
> ARB_transform_feedback3 we need to be able to query primitives 
> generated in
> each stream whether transform feedback is active or not what we do 
> is to
> enable the SOL unit even if transform feedback is not active but 
> disable all
> output buffers in that case. This effectively disables transform 
> feedback
> but permits activation of statistics enabling SO_PRIM_STORAGE_NEEDED 
> even
> when transform feedback is not active.
> ---
>  src/mesa/drivers/dri/i965/gen6_queryobj.c  | 13 +++++++++----
>  src/mesa/drivers/dri/i965/gen7_sol_state.c | 20 +++++++++++++++++---
>  2 files changed, 26 insertions(+), 7 deletions(-)
>  
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 0cb64ca..b4b1509 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -84,11 +84,16 @@ brw_store_register_mem64(struct brw_context *brw,
>  static void
>  write_primitives_generated(struct brw_context *brw,
> -                           drm_intel_bo *query_bo, int idx)
> +                           drm_intel_bo *query_bo, int stream, int 
> idx)
>  {
>     intel_batchbuffer_emit_mi_flush(brw);
> -   brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, 
> idx);
> +   if (brw->gen >= 7) {
> +      brw_store_register_mem64(brw, query_bo,
> +                               GEN7_SO_PRIM_STORAGE_NEEDED(stream), 
> idx);
> +   } else {
> +      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, 
> idx);
> +   }
>  }
>  static void
> @@ -240,7 +245,7 @@ gen6_begin_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>        break;
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 0);
> +      write_primitives_generated(brw, query->bo, 
> query->Base.Stream, 0);
>        break;
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> @@ -279,7 +284,7 @@ gen6_end_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>        break;
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 1);
> +      write_primitives_generated(brw, query->bo, 
> query->Base.Stream, 1);
>        break;
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
> b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 11b2e2e..d2c3ae3 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -223,14 +223,28 @@ upload_3dstate_streamout(struct brw_context 
> *brw, bool active,
>     uint32_t dw1 = 0, dw2 = 0;
>     int i;
> +   /*
> +    * From ARB_transform_feedback3:
> +    *
> +    *   "When a generated primitive query for a vertex stream is 
> active, the
> +    *   primitives-generated count is incremented every time a 
> primitive
> +    *   emitted to that stream reaches the Discarding Rasterization 
> stage
> +    *   (see Section 3.x) right before rasterization. This counter 
> is
> +    *   incremented whether or not transform feedback is active."
> +    *
> +    * Since we can only keep track of generated primitives for each 
> stream
> +    * in the SOL stage we need to make sure it is always active 
> even if
> +    * transform beedback is not. This way we can track primitives 
> generated
> +    * in each stream via SO_PRIMITIVE_STORAGE_NEEDED.
> +    */
> +   dw1 |= SO_FUNCTION_ENABLE;
> +   dw1 |= SO_STATISTICS_ENABLE;
> +
>     if (active) {
>        int urb_entry_read_offset = 0;
>        int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
>           urb_entry_read_offset;
> -      dw1 |= SO_FUNCTION_ENABLE;
> -      dw1 |= SO_STATISTICS_ENABLE;
> -
>        /* _NEW_LIGHT */
>        if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
>           dw1 |= SO_REORDER_TRAILING;
I'm getting an instant GPU lockup on Ivy Bridge (3840QM) which I've 
bisected to this commit in mesa/mesa git master.