[Mesa-dev,10/37] i965/gen6/gs: Compute URB entry size for user-provided geometry shaders.

Submitted by Iago Toral Quiroga on Aug. 14, 2014, 11:11 a.m.

Details

Message ID 1408014729-12708-11-git-send-email-itoral@igalia.com
State Accepted
Headers show

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Aug. 14, 2014, 11:11 a.m.
---
 src/mesa/drivers/dri/i965/brw_defines.h |  8 ++-
 src/mesa/drivers/dri/i965/brw_vec4_gs.c | 87 +++++++++++++++++++++------------
 2 files changed, 62 insertions(+), 33 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 60b3846..a2b40fb 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1576,10 +1576,14 @@  enum brw_message_target {
 # define GEN7_URB_ENTRY_SIZE_SHIFT                      16
 # define GEN7_URB_STARTING_ADDRESS_SHIFT                25
 
-/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
+/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
  * is 2^9, or 512.  It's counted in multiples of 64 bytes.
  */
-#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES		(512*64)
+#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES                (512*64)
+/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
+ * (128 bytes) URB rows and the maximum allowed value is 5 rows.
+ */
+#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
 
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 2d9e8c2..a445174 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -75,31 +75,36 @@  do_gs_prog(struct brw_context *brw,
     */
    c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
 
-   if (gp->program.OutputType == GL_POINTS) {
-      /* When the output type is points, the geometry shader may output data
-       * to multiple streams, and EndPrimitive() has no effect.  So we
-       * configure the hardware to interpret the control data as stream ID.
-       */
-      c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
-
-      /* We only have to emit control bits if we are using streams */
-      if (prog->Geom.UsesStreams)
-         c.control_data_bits_per_vertex = 2;
-      else
-         c.control_data_bits_per_vertex = 0;
+   if (brw->gen >= 7) {
+      if (gp->program.OutputType == GL_POINTS) {
+         /* When the output type is points, the geometry shader may output data
+          * to multiple streams, and EndPrimitive() has no effect.  So we
+          * configure the hardware to interpret the control data as stream ID.
+          */
+         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+         /* We only have to emit control bits if we are using streams */
+         if (prog->Geom.UsesStreams)
+            c.control_data_bits_per_vertex = 2;
+         else
+            c.control_data_bits_per_vertex = 0;
+      } else {
+         /* When the output type is triangle_strip or line_strip, EndPrimitive()
+          * may be used to terminate the current strip and start a new one
+          * (similar to primitive restart), and outputting data to multiple
+          * streams is not supported.  So we configure the hardware to interpret
+          * the control data as EndPrimitive information (a.k.a. "cut bits").
+          */
+         c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+         /* We only need to output control data if the shader actually calls
+          * EndPrimitive().
+          */
+         c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+      }
    } else {
-      /* When the output type is triangle_strip or line_strip, EndPrimitive()
-       * may be used to terminate the current strip and start a new one
-       * (similar to primitive restart), and outputting data to multiple
-       * streams is not supported.  So we configure the hardware to interpret
-       * the control data as EndPrimitive information (a.k.a. "cut bits").
-       */
-      c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
-
-      /* We only need to output control data if the shader actually calls
-       * EndPrimitive().
-       */
-      c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+      /* There are no control data bits in gen6. */
+      c.control_data_bits_per_vertex = 0;
    }
    c.control_data_header_size_bits =
       gp->program.VerticesOut * c.control_data_bits_per_vertex;
@@ -170,7 +175,8 @@  do_gs_prog(struct brw_context *brw,
     *
     */
    unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
-   assert(output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
+   assert(brw->gen == 6 ||
+          output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
    c.prog_data.output_vertex_size_hwords =
       ALIGN(output_vertex_size_bytes, 32) / 32;
 
@@ -200,10 +206,20 @@  do_gs_prog(struct brw_context *brw,
     * the above figures are all worst-case, and most of them scale with the
     * number of output vertices.  So we'll just calculate the amount of space
     * we need, and if it's too large, fail to compile.
+    *
+    * The above is for gen7+ where we have a single URB entry that will hold
+    * all the output. In gen6, we will have to allocate URB entries for every
+    * vertex we emit, so our URB entries only need to be large enough to hold
+    * a single vertex. Also, gen6 does not have a control data header.
     */
-   unsigned output_size_bytes =
-      c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
-   output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+   unsigned output_size_bytes;
+   if (brw->gen >= 7) {
+      output_size_bytes =
+         c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+      output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+   } else {
+      output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
+   }
 
    /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
     * which comes before the control header.
@@ -212,11 +228,20 @@  do_gs_prog(struct brw_context *brw,
       output_size_bytes += 32;
 
    assert(output_size_bytes >= 1);
-   if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
+   int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+   if (brw->gen == 6)
+      max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
+   if (output_size_bytes > max_output_size_bytes)
       return false;
 
-   /* URB entry sizes are stored as a multiple of 64 bytes. */
-   c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+   /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
+    * a multiple of 128 bytes in gen6.
+    */
+   if (brw->gen >= 7)
+      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+   else
+      c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
 
    c.prog_data.output_topology = prim_to_hw_prim[gp->program.OutputType];