[Mesa-dev,02/37] i965/gen6/gs: refactor gen6_gs_state

Submitted by Iago Toral Quiroga on Aug. 14, 2014, 11:11 a.m.

Details

Message ID 1408014729-12708-3-git-send-email-itoral@igalia.com
State Accepted
Headers show

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Aug. 14, 2014, 11:11 a.m.
From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>

Currently, gen6 only uses geometry shaders for transform feedback so the state
we emit is not suitable to accomodate general purpose, user-provided geometry
shaders. This patch paves the way to add these support and the needed
3DSTATE_GS packet modifications for it.

Previous code that emitted state to implement transform feedback in gen6 goes
to upload_gs_state_adhoc_tf().

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
---
 src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
 1 file changed, 94 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index 9648fb7..e132959 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -31,7 +31,7 @@ 
 #include "intel_batchbuffer.h"
 
 static void
-upload_gs_state(struct brw_context *brw)
+upload_gs_state_for_tf(struct brw_context *brw)
 {
    /* Disable all the constant buffers. */
    BEGIN_BATCH(5);
@@ -49,11 +49,11 @@  upload_gs_state(struct brw_context *brw)
       OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
       OUT_BATCH(0); /* no scratch space */
       OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
-	        (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
+                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
       OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
-	        GEN6_GS_STATISTICS_ENABLE |
-		GEN6_GS_SO_STATISTICS_ENABLE |
-		GEN6_GS_RENDERING_ENABLE);
+                GEN6_GS_STATISTICS_ENABLE |
+                GEN6_GS_SO_STATISTICS_ENABLE |
+                GEN6_GS_RENDERING_ENABLE);
       OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
                 GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
                 (brw->ff_gs.prog_data->svbi_postincrement_value <<
@@ -65,24 +65,107 @@  upload_gs_state(struct brw_context *brw)
       OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
       OUT_BATCH(0); /* prog_bo */
       OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
-		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
       OUT_BATCH(0); /* scratch space base offset */
       OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
-		(0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
-		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
       OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
-		GEN6_GS_STATISTICS_ENABLE |
-		GEN6_GS_RENDERING_ENABLE);
+                GEN6_GS_STATISTICS_ENABLE |
+                GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+static void
+upload_gs_state(struct brw_context *brw)
+{
+   /* BRW_NEW_GEOMETRY_PROGRAM */
+   bool active = brw->geometry_program;
+   /* CACHE_NEW_GS_PROG */
+   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
+   const struct brw_stage_state *stage_state = &brw->gs.base;
+
+   if (active) {
+      /* FIXME: enable constant buffers */
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
       OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(stage_state->prog_offset);
+
+      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
+       * was previously done for gen6.
+       *
+       * TODO: test with both disabled to see if the HW is behaving
+       * as expected, like in gen7.
+       */
+      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
+                ((ALIGN(stage_state->sampler_count, 4)/4) <<
+                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
+                ((prog_data->base.binding_table.size_bytes / 4) <<
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+
+      if (prog_data->total_scratch) {
+         OUT_RELOC(stage_state->scratch_bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   ffs(prog_data->total_scratch) - 11);
+      } else {
+         OUT_BATCH(0); /* no scratch space */
+      }
+
+      OUT_BATCH((prog_data->urb_read_length <<
+                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
+                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
+                (prog_data->base.dispatch_grf_start_reg <<
+                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
+
+      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
+                GEN6_GS_STATISTICS_ENABLE |
+                GEN6_GS_SO_STATISTICS_ENABLE |
+                GEN6_GS_RENDERING_ENABLE);
+
+      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
+       * user-provided GS.
+       */
+      if (0) {
+         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
+          * in gen7. SNB and IVB specs are the same regarding the reordering of
+          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
+          * the same thing in both generations. For more details, see the
+          * comment in gen7_gs_state.c
+          */
+         OUT_BATCH(GEN6_GS_REORDER |
+                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
+                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
+                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
+                   GEN6_GS_ENABLE);
+      } else {
+         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
+      }
       ADVANCE_BATCH();
+   } else {
+      /* In gen6, transform feedback support is done with an ad-hoc GS
+       * program. This function provides the needed 3DSTATE_GS.
+       */
+      upload_gs_state_for_tf(brw);
    }
+   brw->gs.enabled = active;
 }
 
 const struct brw_tracked_state gen6_gs_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
       .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
-      .cache = CACHE_NEW_FF_GS_PROG
+      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
    },
    .emit = upload_gs_state,
 };

Comments

Rather than:
i965/gen6/gs: refactor gen6_gs_state

How about something like:
i965/gen6/gs: Skeleton for user GS program support

(more below)

On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
>
> Currently, gen6 only uses geometry shaders for transform feedback so the state
> we emit is not suitable to accomodate general purpose, user-provided geometry
> shaders. This patch paves the way to add these support and the needed
> 3DSTATE_GS packet modifications for it.
>
> Previous code that emitted state to implement transform feedback in gen6 goes
> to upload_gs_state_adhoc_tf().
>
> Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> ---
>  src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
>  1 file changed, 94 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> index 9648fb7..e132959 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> @@ -31,7 +31,7 @@
>  #include "intel_batchbuffer.h"
>
>  static void
> -upload_gs_state(struct brw_context *brw)
> +upload_gs_state_for_tf(struct brw_context *brw)
>  {
>     /* Disable all the constant buffers. */
>     BEGIN_BATCH(5);
> @@ -49,11 +49,11 @@ upload_gs_state(struct brw_context *brw)
>        OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
>        OUT_BATCH(0); /* no scratch space */
>        OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> -               (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> +                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
>        OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> -               GEN6_GS_STATISTICS_ENABLE |
> -               GEN6_GS_SO_STATISTICS_ENABLE |
> -               GEN6_GS_RENDERING_ENABLE);
> +                GEN6_GS_STATISTICS_ENABLE |
> +                GEN6_GS_SO_STATISTICS_ENABLE |
> +                GEN6_GS_RENDERING_ENABLE);

This looks like tabs to spaces conversion. For lines that need to be
changed, it is good to do that conversion. But, in this case, the only
change is the name of the function.

>        OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
>                  GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
>                  (brw->ff_gs.prog_data->svbi_postincrement_value <<
> @@ -65,24 +65,107 @@ upload_gs_state(struct brw_context *brw)
>        OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
>        OUT_BATCH(0); /* prog_bo */
>        OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> -               (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> +                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>        OUT_BATCH(0); /* scratch space base offset */
>        OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> -               (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> -               (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> +                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
>        OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> -               GEN6_GS_STATISTICS_ENABLE |
> -               GEN6_GS_RENDERING_ENABLE);
> +                GEN6_GS_STATISTICS_ENABLE |
> +                GEN6_GS_RENDERING_ENABLE);
> +      OUT_BATCH(0);
> +      ADVANCE_BATCH();
> +   }
> +}
> +
> +static void
> +upload_gs_state(struct brw_context *brw)
> +{
> +   /* BRW_NEW_GEOMETRY_PROGRAM */
> +   bool active = brw->geometry_program;
> +   /* CACHE_NEW_GS_PROG */
> +   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
> +   const struct brw_stage_state *stage_state = &brw->gs.base;
> +
> +   if (active) {
> +      /* FIXME: enable constant buffers */
> +      BEGIN_BATCH(5);
> +      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
>        OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      ADVANCE_BATCH();
> +
> +      BEGIN_BATCH(7);
> +      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> +      OUT_BATCH(stage_state->prog_offset);
> +
> +      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> +       * was previously done for gen6.
> +       *
> +       * TODO: test with both disabled to see if the HW is behaving
> +       * as expected, like in gen7.
> +       */
> +      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
> +                ((ALIGN(stage_state->sampler_count, 4)/4) <<
> +                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
> +                ((prog_data->base.binding_table.size_bytes / 4) <<
> +                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> +
> +      if (prog_data->total_scratch) {
> +         OUT_RELOC(stage_state->scratch_bo,
> +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +                   ffs(prog_data->total_scratch) - 11);
> +      } else {
> +         OUT_BATCH(0); /* no scratch space */
> +      }
> +
> +      OUT_BATCH((prog_data->urb_read_length <<
> +                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
> +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> +                (prog_data->base.dispatch_grf_start_reg <<
> +                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
> +
> +      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> +                GEN6_GS_STATISTICS_ENABLE |
> +                GEN6_GS_SO_STATISTICS_ENABLE |
> +                GEN6_GS_RENDERING_ENABLE);
> +
> +      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
> +       * user-provided GS.
> +       */
> +      if (0) {
> +         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
> +          * in gen7. SNB and IVB specs are the same regarding the reordering of
> +          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
> +          * the same thing in both generations. For more details, see the
> +          * comment in gen7_gs_state.c
> +          */
> +         OUT_BATCH(GEN6_GS_REORDER |
> +                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
> +                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> +                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
> +                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
> +                   GEN6_GS_ENABLE);
> +      } else {
> +         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
> +      }
>        ADVANCE_BATCH();
> +   } else {
> +      /* In gen6, transform feedback support is done with an ad-hoc GS
> +       * program. This function provides the needed 3DSTATE_GS.
> +       */
> +      upload_gs_state_for_tf(brw);

Also handles the 'fully disable GS' case, which is a little weird.
Seems fine though, but I think the comment could note that.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

>     }
> +   brw->gs.enabled = active;
>  }
>
>  const struct brw_tracked_state gen6_gs_state = {
>     .dirty = {
>        .mesa  = _NEW_TRANSFORM,
>        .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
> -      .cache = CACHE_NEW_FF_GS_PROG
> +      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
>     },
>     .emit = upload_gs_state,
>  };
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On mié, 2014-09-03 at 18:51 -0700, Jordan Justen wrote:
> Rather than:
> i965/gen6/gs: refactor gen6_gs_state
> 
> How about something like:
> i965/gen6/gs: Skeleton for user GS program support

Sure, that sounds good to me.

> (more below)
> 
> On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> > From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> >
> > Currently, gen6 only uses geometry shaders for transform feedback so the state
> > we emit is not suitable to accomodate general purpose, user-provided geometry
> > shaders. This patch paves the way to add these support and the needed
> > 3DSTATE_GS packet modifications for it.
> >
> > Previous code that emitted state to implement transform feedback in gen6 goes
> > to upload_gs_state_adhoc_tf().
> >
> > Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> > ---
> >  src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
> >  1 file changed, 94 insertions(+), 11 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > index 9648fb7..e132959 100644
> > --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > @@ -31,7 +31,7 @@
> >  #include "intel_batchbuffer.h"
> >
> >  static void
> > -upload_gs_state(struct brw_context *brw)
> > +upload_gs_state_for_tf(struct brw_context *brw)
> >  {
> >     /* Disable all the constant buffers. */
> >     BEGIN_BATCH(5);
> > @@ -49,11 +49,11 @@ upload_gs_state(struct brw_context *brw)
> >        OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
> >        OUT_BATCH(0); /* no scratch space */
> >        OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> > -               (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> > +                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> >        OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> > -               GEN6_GS_STATISTICS_ENABLE |
> > -               GEN6_GS_SO_STATISTICS_ENABLE |
> > -               GEN6_GS_RENDERING_ENABLE);
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> 
> This looks like tabs to spaces conversion. For lines that need to be
> changed, it is good to do that conversion. But, in this case, the only
> change is the name of the function.

Okay.

You granted the reviewed-by anyway though, so I understand that this
comment is only for reference. Or would you prefer that we remove these
changes from the patch?

> >        OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
> >                  GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> >                  (brw->ff_gs.prog_data->svbi_postincrement_value <<
> > @@ -65,24 +65,107 @@ upload_gs_state(struct brw_context *brw)
> >        OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> >        OUT_BATCH(0); /* prog_bo */
> >        OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> > -               (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> > +                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> >        OUT_BATCH(0); /* scratch space base offset */
> >        OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> > -               (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > -               (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> > +                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> >        OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> > -               GEN6_GS_STATISTICS_ENABLE |
> > -               GEN6_GS_RENDERING_ENABLE);
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> > +      OUT_BATCH(0);
> > +      ADVANCE_BATCH();
> > +   }
> > +}
> > +
> > +static void
> > +upload_gs_state(struct brw_context *brw)
> > +{
> > +   /* BRW_NEW_GEOMETRY_PROGRAM */
> > +   bool active = brw->geometry_program;
> > +   /* CACHE_NEW_GS_PROG */
> > +   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
> > +   const struct brw_stage_state *stage_state = &brw->gs.base;
> > +
> > +   if (active) {
> > +      /* FIXME: enable constant buffers */
> > +      BEGIN_BATCH(5);
> > +      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
> > +      OUT_BATCH(0);
> > +      OUT_BATCH(0);
> >        OUT_BATCH(0);
> > +      OUT_BATCH(0);
> > +      ADVANCE_BATCH();
> > +
> > +      BEGIN_BATCH(7);
> > +      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> > +      OUT_BATCH(stage_state->prog_offset);
> > +
> > +      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> > +       * was previously done for gen6.
> > +       *
> > +       * TODO: test with both disabled to see if the HW is behaving
> > +       * as expected, like in gen7.
> > +       */
> > +      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
> > +                ((ALIGN(stage_state->sampler_count, 4)/4) <<
> > +                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
> > +                ((prog_data->base.binding_table.size_bytes / 4) <<
> > +                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> > +
> > +      if (prog_data->total_scratch) {
> > +         OUT_RELOC(stage_state->scratch_bo,
> > +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> > +                   ffs(prog_data->total_scratch) - 11);
> > +      } else {
> > +         OUT_BATCH(0); /* no scratch space */
> > +      }
> > +
> > +      OUT_BATCH((prog_data->urb_read_length <<
> > +                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> > +                (prog_data->base.dispatch_grf_start_reg <<
> > +                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
> > +
> > +      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> > +
> > +      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
> > +       * user-provided GS.
> > +       */
> > +      if (0) {
> > +         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
> > +          * in gen7. SNB and IVB specs are the same regarding the reordering of
> > +          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
> > +          * the same thing in both generations. For more details, see the
> > +          * comment in gen7_gs_state.c
> > +          */
> > +         OUT_BATCH(GEN6_GS_REORDER |
> > +                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
> > +                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> > +                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
> > +                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
> > +                   GEN6_GS_ENABLE);
> > +      } else {
> > +         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
> > +      }
> >        ADVANCE_BATCH();
> > +   } else {
> > +      /* In gen6, transform feedback support is done with an ad-hoc GS
> > +       * program. This function provides the needed 3DSTATE_GS.
> > +       */
> > +      upload_gs_state_for_tf(brw);
> 
> Also handles the 'fully disable GS' case, which is a little weird.
> Seems fine though, but I think the comment could note that.
> 
> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
> 
> >     }
> > +   brw->gs.enabled = active;
> >  }
> >
> >  const struct brw_tracked_state gen6_gs_state = {
> >     .dirty = {
> >        .mesa  = _NEW_TRANSFORM,
> >        .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
> > -      .cache = CACHE_NEW_FF_GS_PROG
> > +      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
> >     },
> >     .emit = upload_gs_state,
> >  };
> > --
> > 1.9.1
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Wed, Sep 3, 2014 at 11:33 PM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> On mié, 2014-09-03 at 18:51 -0700, Jordan Justen wrote:
>> Rather than:
>> i965/gen6/gs: refactor gen6_gs_state
>>
>> How about something like:
>> i965/gen6/gs: Skeleton for user GS program support
>
> Sure, that sounds good to me.
>
>> (more below)
>>
>> On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral@igalia.com> wrote:
>> > From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
>> >
>> > Currently, gen6 only uses geometry shaders for transform feedback so the state
>> > we emit is not suitable to accomodate general purpose, user-provided geometry
>> > shaders. This patch paves the way to add these support and the needed
>> > 3DSTATE_GS packet modifications for it.
>> >
>> > Previous code that emitted state to implement transform feedback in gen6 goes
>> > to upload_gs_state_adhoc_tf().
>> >
>> > Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
>> > ---
>> >  src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
>> >  1 file changed, 94 insertions(+), 11 deletions(-)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> > index 9648fb7..e132959 100644
>> > --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> > +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> > @@ -31,7 +31,7 @@
>> >  #include "intel_batchbuffer.h"
>> >
>> >  static void
>> > -upload_gs_state(struct brw_context *brw)
>> > +upload_gs_state_for_tf(struct brw_context *brw)
>> >  {
>> >     /* Disable all the constant buffers. */
>> >     BEGIN_BATCH(5);
>> > @@ -49,11 +49,11 @@ upload_gs_state(struct brw_context *brw)
>> >        OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
>> >        OUT_BATCH(0); /* no scratch space */
>> >        OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
>> > -               (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
>> > +                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
>> >        OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
>> > -               GEN6_GS_STATISTICS_ENABLE |
>> > -               GEN6_GS_SO_STATISTICS_ENABLE |
>> > -               GEN6_GS_RENDERING_ENABLE);
>> > +                GEN6_GS_STATISTICS_ENABLE |
>> > +                GEN6_GS_SO_STATISTICS_ENABLE |
>> > +                GEN6_GS_RENDERING_ENABLE);
>>
>> This looks like tabs to spaces conversion. For lines that need to be
>> changed, it is good to do that conversion. But, in this case, the only
>> change is the name of the function.
>
> Okay.
>
> You granted the reviewed-by anyway though, so I understand that this
> comment is only for reference. Or would you prefer that we remove these
> changes from the patch?

Could you clean it up? It makes review easier, both now, and in
looking back through the history. (also, git blame...)

Thanks,

-Jordan

>> >        OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
>> >                  GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
>> >                  (brw->ff_gs.prog_data->svbi_postincrement_value <<
>> > @@ -65,24 +65,107 @@ upload_gs_state(struct brw_context *brw)
>> >        OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
>> >        OUT_BATCH(0); /* prog_bo */
>> >        OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
>> > -               (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>> > +                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>> >        OUT_BATCH(0); /* scratch space base offset */
>> >        OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
>> > -               (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
>> > -               (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
>> > +                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
>> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
>> >        OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
>> > -               GEN6_GS_STATISTICS_ENABLE |
>> > -               GEN6_GS_RENDERING_ENABLE);
>> > +                GEN6_GS_STATISTICS_ENABLE |
>> > +                GEN6_GS_RENDERING_ENABLE);
>> > +      OUT_BATCH(0);
>> > +      ADVANCE_BATCH();
>> > +   }
>> > +}
>> > +
>> > +static void
>> > +upload_gs_state(struct brw_context *brw)
>> > +{
>> > +   /* BRW_NEW_GEOMETRY_PROGRAM */
>> > +   bool active = brw->geometry_program;
>> > +   /* CACHE_NEW_GS_PROG */
>> > +   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
>> > +   const struct brw_stage_state *stage_state = &brw->gs.base;
>> > +
>> > +   if (active) {
>> > +      /* FIXME: enable constant buffers */
>> > +      BEGIN_BATCH(5);
>> > +      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
>> > +      OUT_BATCH(0);
>> > +      OUT_BATCH(0);
>> >        OUT_BATCH(0);
>> > +      OUT_BATCH(0);
>> > +      ADVANCE_BATCH();
>> > +
>> > +      BEGIN_BATCH(7);
>> > +      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
>> > +      OUT_BATCH(stage_state->prog_offset);
>> > +
>> > +      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
>> > +       * was previously done for gen6.
>> > +       *
>> > +       * TODO: test with both disabled to see if the HW is behaving
>> > +       * as expected, like in gen7.
>> > +       */
>> > +      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
>> > +                ((ALIGN(stage_state->sampler_count, 4)/4) <<
>> > +                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
>> > +                ((prog_data->base.binding_table.size_bytes / 4) <<
>> > +                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>> > +
>> > +      if (prog_data->total_scratch) {
>> > +         OUT_RELOC(stage_state->scratch_bo,
>> > +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> > +                   ffs(prog_data->total_scratch) - 11);
>> > +      } else {
>> > +         OUT_BATCH(0); /* no scratch space */
>> > +      }
>> > +
>> > +      OUT_BATCH((prog_data->urb_read_length <<
>> > +                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
>> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
>> > +                (prog_data->base.dispatch_grf_start_reg <<
>> > +                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
>> > +
>> > +      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
>> > +                GEN6_GS_STATISTICS_ENABLE |
>> > +                GEN6_GS_SO_STATISTICS_ENABLE |
>> > +                GEN6_GS_RENDERING_ENABLE);
>> > +
>> > +      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
>> > +       * user-provided GS.
>> > +       */
>> > +      if (0) {
>> > +         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
>> > +          * in gen7. SNB and IVB specs are the same regarding the reordering of
>> > +          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
>> > +          * the same thing in both generations. For more details, see the
>> > +          * comment in gen7_gs_state.c
>> > +          */
>> > +         OUT_BATCH(GEN6_GS_REORDER |
>> > +                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
>> > +                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
>> > +                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
>> > +                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
>> > +                   GEN6_GS_ENABLE);
>> > +      } else {
>> > +         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
>> > +      }
>> >        ADVANCE_BATCH();
>> > +   } else {
>> > +      /* In gen6, transform feedback support is done with an ad-hoc GS
>> > +       * program. This function provides the needed 3DSTATE_GS.
>> > +       */
>> > +      upload_gs_state_for_tf(brw);
>>
>> Also handles the 'fully disable GS' case, which is a little weird.
>> Seems fine though, but I think the comment could note that.
>>
>> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
>>
>> >     }
>> > +   brw->gs.enabled = active;
>> >  }
>> >
>> >  const struct brw_tracked_state gen6_gs_state = {
>> >     .dirty = {
>> >        .mesa  = _NEW_TRANSFORM,
>> >        .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
>> > -      .cache = CACHE_NEW_FF_GS_PROG
>> > +      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
>> >     },
>> >     .emit = upload_gs_state,
>> >  };
>> > --
>> > 1.9.1
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
On jue, 2014-09-04 at 00:44 -0700, Jordan Justen wrote:
> On Wed, Sep 3, 2014 at 11:33 PM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> > On mié, 2014-09-03 at 18:51 -0700, Jordan Justen wrote:
> >> Rather than:
> >> i965/gen6/gs: refactor gen6_gs_state
> >>
> >> How about something like:
> >> i965/gen6/gs: Skeleton for user GS program support
> >
> > Sure, that sounds good to me.
> >
> >> (more below)
> >>
> >> On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> >> > From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> >> >
> >> > Currently, gen6 only uses geometry shaders for transform feedback so the state
> >> > we emit is not suitable to accomodate general purpose, user-provided geometry
> >> > shaders. This patch paves the way to add these support and the needed
> >> > 3DSTATE_GS packet modifications for it.
> >> >
> >> > Previous code that emitted state to implement transform feedback in gen6 goes
> >> > to upload_gs_state_adhoc_tf().
> >> >
> >> > Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> >> > ---
> >> >  src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
> >> >  1 file changed, 94 insertions(+), 11 deletions(-)
> >> >
> >> > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> >> > index 9648fb7..e132959 100644
> >> > --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
> >> > +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> >> > @@ -31,7 +31,7 @@
> >> >  #include "intel_batchbuffer.h"
> >> >
> >> >  static void
> >> > -upload_gs_state(struct brw_context *brw)
> >> > +upload_gs_state_for_tf(struct brw_context *brw)
> >> >  {
> >> >     /* Disable all the constant buffers. */
> >> >     BEGIN_BATCH(5);
> >> > @@ -49,11 +49,11 @@ upload_gs_state(struct brw_context *brw)
> >> >        OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
> >> >        OUT_BATCH(0); /* no scratch space */
> >> >        OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> >> > -               (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> >> > +                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> >> >        OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> >> > -               GEN6_GS_STATISTICS_ENABLE |
> >> > -               GEN6_GS_SO_STATISTICS_ENABLE |
> >> > -               GEN6_GS_RENDERING_ENABLE);
> >> > +                GEN6_GS_STATISTICS_ENABLE |
> >> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> >> > +                GEN6_GS_RENDERING_ENABLE);
> >>
> >> This looks like tabs to spaces conversion. For lines that need to be
> >> changed, it is good to do that conversion. But, in this case, the only
> >> change is the name of the function.
> >
> > Okay.
> >
> > You granted the reviewed-by anyway though, so I understand that this
> > comment is only for reference. Or would you prefer that we remove these
> > changes from the patch?
> 
> Could you clean it up? It makes review easier, both now, and in
> looking back through the history. (also, git blame...)
> 
> Thanks,

Sure, no problem.

Iago

> -Jordan
> 
> >> >        OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
> >> >                  GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> >> >                  (brw->ff_gs.prog_data->svbi_postincrement_value <<
> >> > @@ -65,24 +65,107 @@ upload_gs_state(struct brw_context *brw)
> >> >        OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> >> >        OUT_BATCH(0); /* prog_bo */
> >> >        OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> >> > -               (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> >> > +                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> >> >        OUT_BATCH(0); /* scratch space base offset */
> >> >        OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> >> > -               (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> >> > -               (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> >> > +                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> >> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> >> >        OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> >> > -               GEN6_GS_STATISTICS_ENABLE |
> >> > -               GEN6_GS_RENDERING_ENABLE);
> >> > +                GEN6_GS_STATISTICS_ENABLE |
> >> > +                GEN6_GS_RENDERING_ENABLE);
> >> > +      OUT_BATCH(0);
> >> > +      ADVANCE_BATCH();
> >> > +   }
> >> > +}
> >> > +
> >> > +static void
> >> > +upload_gs_state(struct brw_context *brw)
> >> > +{
> >> > +   /* BRW_NEW_GEOMETRY_PROGRAM */
> >> > +   bool active = brw->geometry_program;
> >> > +   /* CACHE_NEW_GS_PROG */
> >> > +   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
> >> > +   const struct brw_stage_state *stage_state = &brw->gs.base;
> >> > +
> >> > +   if (active) {
> >> > +      /* FIXME: enable constant buffers */
> >> > +      BEGIN_BATCH(5);
> >> > +      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
> >> > +      OUT_BATCH(0);
> >> > +      OUT_BATCH(0);
> >> >        OUT_BATCH(0);
> >> > +      OUT_BATCH(0);
> >> > +      ADVANCE_BATCH();
> >> > +
> >> > +      BEGIN_BATCH(7);
> >> > +      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> >> > +      OUT_BATCH(stage_state->prog_offset);
> >> > +
> >> > +      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> >> > +       * was previously done for gen6.
> >> > +       *
> >> > +       * TODO: test with both disabled to see if the HW is behaving
> >> > +       * as expected, like in gen7.
> >> > +       */
> >> > +      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
> >> > +                ((ALIGN(stage_state->sampler_count, 4)/4) <<
> >> > +                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
> >> > +                ((prog_data->base.binding_table.size_bytes / 4) <<
> >> > +                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> >> > +
> >> > +      if (prog_data->total_scratch) {
> >> > +         OUT_RELOC(stage_state->scratch_bo,
> >> > +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> >> > +                   ffs(prog_data->total_scratch) - 11);
> >> > +      } else {
> >> > +         OUT_BATCH(0); /* no scratch space */
> >> > +      }
> >> > +
> >> > +      OUT_BATCH((prog_data->urb_read_length <<
> >> > +                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
> >> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> >> > +                (prog_data->base.dispatch_grf_start_reg <<
> >> > +                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
> >> > +
> >> > +      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> >> > +                GEN6_GS_STATISTICS_ENABLE |
> >> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> >> > +                GEN6_GS_RENDERING_ENABLE);
> >> > +
> >> > +      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
> >> > +       * user-provided GS.
> >> > +       */
> >> > +      if (0) {
> >> > +         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
> >> > +          * in gen7. SNB and IVB specs are the same regarding the reordering of
> >> > +          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
> >> > +          * the same thing in both generations. For more details, see the
> >> > +          * comment in gen7_gs_state.c
> >> > +          */
> >> > +         OUT_BATCH(GEN6_GS_REORDER |
> >> > +                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
> >> > +                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> >> > +                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
> >> > +                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
> >> > +                   GEN6_GS_ENABLE);
> >> > +      } else {
> >> > +         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
> >> > +      }
> >> >        ADVANCE_BATCH();
> >> > +   } else {
> >> > +      /* In gen6, transform feedback support is done with an ad-hoc GS
> >> > +       * program. This function provides the needed 3DSTATE_GS.
> >> > +       */
> >> > +      upload_gs_state_for_tf(brw);
> >>
> >> Also handles the 'fully disable GS' case, which is a little weird.
> >> Seems fine though, but I think the comment could note that.
> >>
> >> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
> >>
> >> >     }
> >> > +   brw->gs.enabled = active;
> >> >  }
> >> >
> >> >  const struct brw_tracked_state gen6_gs_state = {
> >> >     .dirty = {
> >> >        .mesa  = _NEW_TRANSFORM,
> >> >        .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
> >> > -      .cache = CACHE_NEW_FF_GS_PROG
> >> > +      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
> >> >     },
> >> >     .emit = upload_gs_state,
> >> >  };
> >> > --
> >> > 1.9.1
> >> >
> >> > _______________________________________________
> >> > mesa-dev mailing list
> >> > mesa-dev@lists.freedesktop.org
> >> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >>
> >
> >
>
On mié, 2014-09-03 at 18:51 -0700, Jordan Justen wrote:
> Rather than:
> i965/gen6/gs: refactor gen6_gs_state
> 
> How about something like:
> i965/gen6/gs: Skeleton for user GS program support
> 
> (more below)
> 
> On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <itoral@igalia.com> wrote:
> > From: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> >
> > Currently, gen6 only uses geometry shaders for transform feedback so the state
> > we emit is not suitable to accomodate general purpose, user-provided geometry
> > shaders. This patch paves the way to add these support and the needed
> > 3DSTATE_GS packet modifications for it.
> >
> > Previous code that emitted state to implement transform feedback in gen6 goes
> > to upload_gs_state_adhoc_tf().
> >
> > Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
> > ---
> >  src/mesa/drivers/dri/i965/gen6_gs_state.c | 105 ++++++++++++++++++++++++++----
> >  1 file changed, 94 insertions(+), 11 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > index 9648fb7..e132959 100644
> > --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
> > @@ -31,7 +31,7 @@
> >  #include "intel_batchbuffer.h"
> >
> >  static void
> > -upload_gs_state(struct brw_context *brw)
> > +upload_gs_state_for_tf(struct brw_context *brw)
> >  {
> >     /* Disable all the constant buffers. */
> >     BEGIN_BATCH(5);
> > @@ -49,11 +49,11 @@ upload_gs_state(struct brw_context *brw)
> >        OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
> >        OUT_BATCH(0); /* no scratch space */
> >        OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> > -               (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> > +                (brw->ff_gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
> >        OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> > -               GEN6_GS_STATISTICS_ENABLE |
> > -               GEN6_GS_SO_STATISTICS_ENABLE |
> > -               GEN6_GS_RENDERING_ENABLE);
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> 
> This looks like tabs to spaces conversion. For lines that need to be
> changed, it is good to do that conversion. But, in this case, the only
> change is the name of the function.
> 
> >        OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
> >                  GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> >                  (brw->ff_gs.prog_data->svbi_postincrement_value <<
> > @@ -65,24 +65,107 @@ upload_gs_state(struct brw_context *brw)
> >        OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> >        OUT_BATCH(0); /* prog_bo */
> >        OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> > -               (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> > +                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> >        OUT_BATCH(0); /* scratch space base offset */
> >        OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> > -               (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > -               (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> > +                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> >        OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> > -               GEN6_GS_STATISTICS_ENABLE |
> > -               GEN6_GS_RENDERING_ENABLE);
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> > +      OUT_BATCH(0);
> > +      ADVANCE_BATCH();
> > +   }
> > +}
> > +
> > +static void
> > +upload_gs_state(struct brw_context *brw)
> > +{
> > +   /* BRW_NEW_GEOMETRY_PROGRAM */
> > +   bool active = brw->geometry_program;
> > +   /* CACHE_NEW_GS_PROG */
> > +   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
> > +   const struct brw_stage_state *stage_state = &brw->gs.base;
> > +
> > +   if (active) {
> > +      /* FIXME: enable constant buffers */
> > +      BEGIN_BATCH(5);
> > +      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
> > +      OUT_BATCH(0);
> > +      OUT_BATCH(0);
> >        OUT_BATCH(0);
> > +      OUT_BATCH(0);
> > +      ADVANCE_BATCH();
> > +
> > +      BEGIN_BATCH(7);
> > +      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> > +      OUT_BATCH(stage_state->prog_offset);
> > +
> > +      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> > +       * was previously done for gen6.
> > +       *
> > +       * TODO: test with both disabled to see if the HW is behaving
> > +       * as expected, like in gen7.
> > +       */
> > +      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
> > +                ((ALIGN(stage_state->sampler_count, 4)/4) <<
> > +                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
> > +                ((prog_data->base.binding_table.size_bytes / 4) <<
> > +                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> > +
> > +      if (prog_data->total_scratch) {
> > +         OUT_RELOC(stage_state->scratch_bo,
> > +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> > +                   ffs(prog_data->total_scratch) - 11);
> > +      } else {
> > +         OUT_BATCH(0); /* no scratch space */
> > +      }
> > +
> > +      OUT_BATCH((prog_data->urb_read_length <<
> > +                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
> > +                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> > +                (prog_data->base.dispatch_grf_start_reg <<
> > +                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
> > +
> > +      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
> > +                GEN6_GS_STATISTICS_ENABLE |
> > +                GEN6_GS_SO_STATISTICS_ENABLE |
> > +                GEN6_GS_RENDERING_ENABLE);
> > +
> > +      /* FIXME: Enable SVBI payload only when TF is enable in SNB for
> > +       * user-provided GS.
> > +       */
> > +      if (0) {
> > +         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
> > +          * in gen7. SNB and IVB specs are the same regarding the reordering of
> > +          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
> > +          * the same thing in both generations. For more details, see the
> > +          * comment in gen7_gs_state.c
> > +          */
> > +         OUT_BATCH(GEN6_GS_REORDER |
> > +                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
> > +                   GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
> > +                   /* FIXME: prog_data->svbi_postincrement_value instead of 0 */
> > +                   (0 << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
> > +                   GEN6_GS_ENABLE);
> > +      } else {
> > +         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
> > +      }
> >        ADVANCE_BATCH();
> > +   } else {
> > +      /* In gen6, transform feedback support is done with an ad-hoc GS
> > +       * program. This function provides the needed 3DSTATE_GS.
> > +       */
> > +      upload_gs_state_for_tf(brw);
> 
> Also handles the 'fully disable GS' case, which is a little weird.
> Seems fine though, but I think the comment could note that.

I think this is a bit weird and is easy to fix, so I'll move the else
part of upload_gs_state_for_tf to upload_gs_state.

Iago

> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
> 
> >     }
> > +   brw->gs.enabled = active;
> >  }
> >
> >  const struct brw_tracked_state gen6_gs_state = {
> >     .dirty = {
> >        .mesa  = _NEW_TRANSFORM,
> >        .brw   = BRW_NEW_CONTEXT | BRW_NEW_PUSH_CONSTANT_ALLOCATION,
> > -      .cache = CACHE_NEW_FF_GS_PROG
> > +      .cache = (CACHE_NEW_GS_PROG | CACHE_NEW_FF_GS_PROG)
> >     },
> >     .emit = upload_gs_state,
> >  };
> > --
> > 1.9.1
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>