st/mesa: Pass index to pipe->create_query() for statistics queries.

Submitted by Kenneth Graunke on Oct. 15, 2018, 6:29 a.m.

Details

Message ID 20181015062939.16863-1-kenneth@whitecape.org
State New
Headers show
Series "st/mesa: Pass index to pipe->create_query() for statistics queries." ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Kenneth Graunke Oct. 15, 2018, 6:29 a.m.
GL exposes separate queries for each pipeline statistics counter.
For some reason, Gallium chose to map them all to a single target,
PIPE_QUERY_PIPELINE_STATISTICS.  Radeon hardware appears to query
them all as a group.  pipe->get_query_result_resource() takes an
index, indicating which to write to the buffer.  The CPU-side hook,
pipe->get_query_result(), simply writes them all, and st/mesa returns
the one that was actually desired.

On Intel hardware, each individual pipeline statistics value is handled
as a separate counter and query.  We can query each individually, and
that is more efficient than querying all 11 counters each time.  But,
we need pipe->get_query_result() to know which one to return.

To handle this, we pass the index into pipe->create_query(), which
was previously always 0 for these queries.  Drivers which return all
of the counters as a group can simply ignore it; drivers querying one
at a time can use it to distinguish between the counters.

This is the least invasive fix, but it is kind of ugly, and I wonder
whether we'd be better off just adding PIPE_QUERY_IA_VERTICES (etc.)
targets...
---
 src/mesa/state_tracker/st_cb_queryobj.c | 76 ++++++++++++-------------
 1 file changed, 36 insertions(+), 40 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index 69e6004c3f1..0dc06ceb574 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -88,6 +88,40 @@  st_DeleteQuery(struct gl_context *ctx, struct gl_query_object *q)
    free(stq);
 }
 
+static int
+target_to_index(const struct gl_query_object *q)
+{
+   switch (q->Target) {
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+      return q->Stream;
+   case GL_VERTICES_SUBMITTED_ARB:
+      return 0;
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+      return 1;
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+      return 2;
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+      return 3;
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+      return 4;
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+      return 5;
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+      return 6;
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+      return 7;
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+      return 8;
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+      return 9;
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+      return 10;
+   default:
+      return 0;
+   }
+}
 
 static void
 st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
@@ -164,7 +198,7 @@  st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
          ret = pipe->end_query(pipe, stq->pq_begin);
    } else {
       if (!stq->pq) {
-         stq->pq = pipe->create_query(pipe, type, q->Stream);
+         stq->pq = pipe->create_query(pipe, type, target_to_index(q));
          stq->type = type;
       }
       if (stq->pq)
@@ -383,46 +417,8 @@  st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q,
 
    if (pname == GL_QUERY_RESULT_AVAILABLE) {
       index = -1;
-   } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
-      switch (q->Target) {
-      case GL_VERTICES_SUBMITTED_ARB:
-         index = 0;
-         break;
-      case GL_PRIMITIVES_SUBMITTED_ARB:
-         index = 1;
-         break;
-      case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-         index = 2;
-         break;
-      case GL_GEOMETRY_SHADER_INVOCATIONS:
-         index = 3;
-         break;
-      case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-         index = 4;
-         break;
-      case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-         index = 5;
-         break;
-      case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-         index = 6;
-         break;
-      case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-         index = 7;
-         break;
-      case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-         index = 8;
-         break;
-      case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-         index = 9;
-         break;
-      case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-         index = 10;
-         break;
-      default:
-         unreachable("Unexpected target");
-      }
    } else {
-      index = 0;
+      index = target_to_index(q);
    }
 
    pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,

Comments

FWIW the gallium pipeline stats query exists for way longer than the GL
ARB spec for it, and at least llvmpipe implemented it for ages.
So the reason for it being like that is due to dx10 (which always
queries these together), when gl couldn't do it at all.

To make it a bit nicer you could use new defines instead of just numbers
for indices, without making the change more intrusive.
I'm not sure it's really worth the trouble of splitting it up (well it
would mean we'd have to emit a boatload of queries for dx10, unless you
just add additional ones, but then the drivers would need to support
both...), since I don't think it's really something which gets used a lot.

Some comment inline.


Am 15.10.18 um 08:29 schrieb Kenneth Graunke:
> GL exposes separate queries for each pipeline statistics counter.

> For some reason, Gallium chose to map them all to a single target,

> PIPE_QUERY_PIPELINE_STATISTICS.  Radeon hardware appears to query

> them all as a group.  pipe->get_query_result_resource() takes an

> index, indicating which to write to the buffer.  The CPU-side hook,

> pipe->get_query_result(), simply writes them all, and st/mesa returns

> the one that was actually desired.

> 

> On Intel hardware, each individual pipeline statistics value is handled

> as a separate counter and query.  We can query each individually, and

> that is more efficient than querying all 11 counters each time.  But,

> we need pipe->get_query_result() to know which one to return.

> 

> To handle this, we pass the index into pipe->create_query(), which

> was previously always 0 for these queries.  Drivers which return all

> of the counters as a group can simply ignore it; drivers querying one

> at a time can use it to distinguish between the counters.

> 

> This is the least invasive fix, but it is kind of ugly, and I wonder

> whether we'd be better off just adding PIPE_QUERY_IA_VERTICES (etc.)

> targets...

> ---

>  src/mesa/state_tracker/st_cb_queryobj.c | 76 ++++++++++++-------------

>  1 file changed, 36 insertions(+), 40 deletions(-)

> 

> diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c

> index 69e6004c3f1..0dc06ceb574 100644

> --- a/src/mesa/state_tracker/st_cb_queryobj.c

> +++ b/src/mesa/state_tracker/st_cb_queryobj.c

> @@ -88,6 +88,40 @@ st_DeleteQuery(struct gl_context *ctx, struct gl_query_object *q)

>     free(stq);

>  }

>  

> +static int

> +target_to_index(const struct gl_query_object *q)

> +{

> +   switch (q->Target) {

> +   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:

> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:

> +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:

The last one here doesn't actually have an index (as it is used for
querying all streams) - albeit I suppose q->Stream should be 0 anyway.
GL_PRIMITIVES_GENERATED though can have an index.

Otherwise looks reasonable to me.

Roland


> +      return q->Stream;

> +   case GL_VERTICES_SUBMITTED_ARB:

> +      return 0;

> +   case GL_PRIMITIVES_SUBMITTED_ARB:

> +      return 1;

> +   case GL_VERTEX_SHADER_INVOCATIONS_ARB:

> +      return 2;

> +   case GL_GEOMETRY_SHADER_INVOCATIONS:

> +      return 3;

> +   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:

> +      return 4;

> +   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:

> +      return 5;

> +   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:

> +      return 6;

> +   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:

> +      return 7;

> +   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:

> +      return 8;

> +   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:

> +      return 9;

> +   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:

> +      return 10;

> +   default:

> +      return 0;

> +   }

> +}

>  

>  static void

>  st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)

> @@ -164,7 +198,7 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)

>           ret = pipe->end_query(pipe, stq->pq_begin);

>     } else {

>        if (!stq->pq) {

> -         stq->pq = pipe->create_query(pipe, type, q->Stream);

> +         stq->pq = pipe->create_query(pipe, type, target_to_index(q));

>           stq->type = type;

>        }

>        if (stq->pq)

> @@ -383,46 +417,8 @@ st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q,

>  

>     if (pname == GL_QUERY_RESULT_AVAILABLE) {

>        index = -1;

> -   } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {

> -      switch (q->Target) {

> -      case GL_VERTICES_SUBMITTED_ARB:

> -         index = 0;

> -         break;

> -      case GL_PRIMITIVES_SUBMITTED_ARB:

> -         index = 1;

> -         break;

> -      case GL_VERTEX_SHADER_INVOCATIONS_ARB:

> -         index = 2;

> -         break;

> -      case GL_GEOMETRY_SHADER_INVOCATIONS:

> -         index = 3;

> -         break;

> -      case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:

> -         index = 4;

> -         break;

> -      case GL_CLIPPING_INPUT_PRIMITIVES_ARB:

> -         index = 5;

> -         break;

> -      case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:

> -         index = 6;

> -         break;

> -      case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:

> -         index = 7;

> -         break;

> -      case GL_TESS_CONTROL_SHADER_PATCHES_ARB:

> -         index = 8;

> -         break;

> -      case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:

> -         index = 9;

> -         break;

> -      case GL_COMPUTE_SHADER_INVOCATIONS_ARB:

> -         index = 10;

> -         break;

> -      default:

> -         unreachable("Unexpected target");

> -      }

>     } else {

> -      index = 0;

> +      index = target_to_index(q);

>     }

>  

>     pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,

>