[Mesa-dev,08/37] i965/gen6/gs: Implement GS_OPCODE_URB_WRITE_ALLOCATE.

Submitted by Iago Toral Quiroga on Aug. 14, 2014, 11:11 a.m.

Details

Message ID 1408014729-12708-9-git-send-email-itoral@igalia.com
State Accepted
Headers show

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Aug. 14, 2014, 11:11 a.m.
Gen6 geometry shaders need to allocate URB handles for each new vertex they
emit after the first (the URB handle for the first vertex is obtained via the
FF_SYNC message).

This opcode adds the URB allocation mechanism to regular URB writes.
---
 src/mesa/drivers/dri/i965/brw_defines.h          |  8 +++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.h             |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 30 ++++++++++++++++++++++++
 5 files changed, 42 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 125d728..60b3846 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -929,6 +929,14 @@  enum opcode {
    GS_OPCODE_URB_WRITE,
 
    /**
+    * Write geometry shader output data to the URB and request a new URB
+    * handle (gen6).
+    *
+    * This opcode doesn't do an implied move from R0 to the first MRF.
+    */
+   GS_OPCODE_URB_WRITE_ALLOCATE,
+
+   /**
     * Terminate the geometry shader thread by doing an empty URB write.
     *
     * This opcode doesn't do an implied move from R0 to the first MRF.  This
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 5749061..69d16a7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -514,6 +514,8 @@  brw_instruction_name(enum opcode op)
 
    case GS_OPCODE_URB_WRITE:
       return "gs_urb_write";
+   case GS_OPCODE_URB_WRITE_ALLOCATE:
+      return "gs_urb_write_allocate";
    case GS_OPCODE_THREAD_END:
       return "gs_thread_end";
    case GS_OPCODE_SET_WRITE_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b572b61..e413a05 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -274,6 +274,7 @@  vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
       return 3;
    case GS_OPCODE_URB_WRITE:
+   case GS_OPCODE_URB_WRITE_ALLOCATE:
    case GS_OPCODE_THREAD_END:
       return 0;
    case SHADER_OPCODE_SHADER_TIME_ADD:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 72fabdd..c1daf54 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -649,6 +649,7 @@  private:
 
    void generate_vs_urb_write(vec4_instruction *inst);
    void generate_gs_urb_write(vec4_instruction *inst);
+   void generate_gs_urb_write_allocate(vec4_instruction *inst);
    void generate_gs_thread_end(vec4_instruction *inst);
    void generate_gs_set_write_offset(struct brw_reg dst,
                                      struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 05f4892..8ef0c34 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -429,6 +429,32 @@  vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
 }
 
 void
+vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
+{
+   struct brw_reg src = brw_message_reg(inst->base_mrf);
+
+   /* We pass the temporary passed in src0 as the writeback register */
+   brw_urb_WRITE(p,
+                 inst->get_src(this->prog_data, 0), /* dest */
+                 inst->base_mrf, /* starting mrf reg nr */
+                 src,
+                 BRW_URB_WRITE_ALLOCATE_COMPLETE,
+                 inst->mlen,
+                 1, /* response len */
+                 inst->offset,  /* urb destination offset */
+                 BRW_URB_SWIZZLE_INTERLEAVE);
+
+   /* Now put allocated urb handle in dst.0 */
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, get_element_ud(inst->get_dst(), 0),
+           get_element_ud(inst->get_src(this->prog_data, 0), 0));
+   brw_set_default_access_mode(p, BRW_ALIGN_16);
+   brw_pop_insn_state(p);
+}
+
+void
 vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
 {
    struct brw_reg src = brw_message_reg(inst->base_mrf);
@@ -1206,6 +1232,10 @@  vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
       generate_gs_urb_write(inst);
       break;
 
+   case GS_OPCODE_URB_WRITE_ALLOCATE:
+      generate_gs_urb_write_allocate(inst);
+      break;
+
    case GS_OPCODE_THREAD_END:
       generate_gs_thread_end(inst);
       break;