intel/gen9+: Enable object level preemption.

Submitted by Rafael Antognolli on Feb. 16, 2018, 9:44 p.m.

Details

Message ID 20180216214400.15099-1-rafael.antognolli@intel.com
State New
Headers show
Series "intel/gen9+: Enable object level preemption." ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Rafael Antognolli Feb. 16, 2018, 9:44 p.m.
"This field controls the granularity of the replay mechanism when
 coming back into a previously preempted context."

The kernel disables this bit but whitelists the register, and it's a
context register. So enable it and take advantage of finer granularity
when preemption is available.

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Cc: Ben Widawsky <ben@bwidawsk.net>
---

This patch still needs more testing (only ran it through CI and also did
some basic tests on my machine to make sure it's not breaking anything).

 src/intel/genxml/gen10.xml                   |  8 ++++++++
 src/intel/genxml/gen11.xml                   |  8 ++++++++
 src/intel/genxml/gen9.xml                    |  8 ++++++++
 src/intel/vulkan/genX_state.c                | 18 ++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_defines.h      |  5 +++++
 src/mesa/drivers/dri/i965/brw_state_upload.c | 10 ++++++++++
 6 files changed, 57 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index 47c679a3fa9..42ac6e82696 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -3692,6 +3692,14 @@ 
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index 9a8a2fe21e3..e6ce42b2bfb 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -3688,6 +3688,14 @@ 
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 7eef4bee013..45e1fddeb50 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3638,6 +3638,14 @@ 
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 54fb8634fdc..83b6c6387f3 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -169,6 +169,24 @@  genX(init_device_state)(struct anv_device *device)
    gen10_emit_wa_lri_to_cache_mode_zero(&batch);
 #endif
 
+#if GEN_GEN >= 9
+   /* A fixed function pipe flush is required before modifying this field */
+   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
+      pipe.PipeControlFlushEnable = true;
+   }
+
+   /* enable object level preemption */
+   uint32_t csc1;
+
+   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
+                   .ReplayMode = ObjectLevelPreemption,
+                   .ReplayModeMask = 1);
+   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
+      lri.DataDWord        = csc1;
+   }
+#endif
+
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
 
    assert(batch.next <= batch.end);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8bf6f68b67c..f0994d3b139 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1661,4 +1661,9 @@  enum brw_pixel_shader_coverage_mask_mode {
 # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
 # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
 
+#define CS_CHICKEN1                        0x2580 /* Gen9+ */
+# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
+# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
+# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 86c12e4d357..a90dc01d87b 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -115,6 +115,16 @@  brw_upload_initial_gpu_state(struct brw_context *brw)
       OUT_BATCH(0);
       ADVANCE_BATCH();
    }
+
+   if (devinfo->gen >= 9) {
+      /* A fixed function pipe flush is required before modifying this field */
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
+      /* enable object level preemption */
+      brw_load_register_imm32(brw, CS_CHICKEN1,
+                              GEN9_REPLAY_MODE_MIDOBJECT |
+                              GEN9_REPLAY_MODE_MASK);
+   }
 }
 
 static inline const struct brw_tracked_state *

Comments

On 18-02-16 13:44:00, Antognolli, Rafael wrote:
>"This field controls the granularity of the replay mechanism when
> coming back into a previously preempted context."
>
>The kernel disables this bit but whitelists the register, and it's a
>context register. So enable it and take advantage of finer granularity
>when preemption is available.
>

Does the kernel actually disable it? I thought the kernel just doesn't touch it
(I don't think it's whitelisted by the kernel either, it's just writable).

>Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
>Cc: Ben Widawsky <ben@bwidawsk.net>
>---
>
>This patch still needs more testing (only ran it through CI and also did
>some basic tests on my machine to make sure it's not breaking anything).
>
> src/intel/genxml/gen10.xml                   |  8 ++++++++
> src/intel/genxml/gen11.xml                   |  8 ++++++++
> src/intel/genxml/gen9.xml                    |  8 ++++++++
> src/intel/vulkan/genX_state.c                | 18 ++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_defines.h      |  5 +++++
> src/mesa/drivers/dri/i965/brw_state_upload.c | 10 ++++++++++
> 6 files changed, 57 insertions(+)
>
>diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
>index 47c679a3fa9..42ac6e82696 100644
>--- a/src/intel/genxml/gen10.xml
>+++ b/src/intel/genxml/gen10.xml
>@@ -3692,6 +3692,14 @@
>     <field name="All Allocation" start="25" end="31" type="uint"/>
>   </register>
>
>+  <register name="CS_CHICKEN1" length="1" num="0x2580">
>+    <field name="Replay Mode" start="0" end="0" type="uint">
>+      <value name="Mid-cmdbuffer Preemption" value="0"/>
>+      <value name="Object Level Preemption" value="1"/>
>+    </field>
>+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>+  </register>
>+
>   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>     <field name="Write Offset" start="2" end="31" type="offset"/>
>   </register>
>diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
>index 9a8a2fe21e3..e6ce42b2bfb 100644
>--- a/src/intel/genxml/gen11.xml
>+++ b/src/intel/genxml/gen11.xml
>@@ -3688,6 +3688,14 @@
>     <field name="All Allocation" start="25" end="31" type="uint"/>
>   </register>
>
>+  <register name="CS_CHICKEN1" length="1" num="0x2580">
>+    <field name="Replay Mode" start="0" end="0" type="uint">
>+      <value name="Mid-cmdbuffer Preemption" value="0"/>
>+      <value name="Object Level Preemption" value="1"/>
>+    </field>
>+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>+  </register>
>+
>   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>     <field name="Write Offset" start="2" end="31" type="offset"/>
>   </register>
>diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
>index 7eef4bee013..45e1fddeb50 100644
>--- a/src/intel/genxml/gen9.xml
>+++ b/src/intel/genxml/gen9.xml
>@@ -3638,6 +3638,14 @@
>     <field name="All Allocation" start="25" end="31" type="uint"/>
>   </register>
>
>+  <register name="CS_CHICKEN1" length="1" num="0x2580">
>+    <field name="Replay Mode" start="0" end="0" type="uint">
>+      <value name="Mid-cmdbuffer Preemption" value="0"/>
>+      <value name="Object Level Preemption" value="1"/>
>+    </field>
>+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>+  </register>
>+
>   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>     <field name="Write Offset" start="2" end="31" type="offset"/>
>   </register>
>diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
>index 54fb8634fdc..83b6c6387f3 100644
>--- a/src/intel/vulkan/genX_state.c
>+++ b/src/intel/vulkan/genX_state.c
>@@ -169,6 +169,24 @@ genX(init_device_state)(struct anv_device *device)
>    gen10_emit_wa_lri_to_cache_mode_zero(&batch);
> #endif
>
>+#if GEN_GEN >= 9
>+   /* A fixed function pipe flush is required before modifying this field */
>+   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
>+      pipe.PipeControlFlushEnable = true;
>+   }
>+
>+   /* enable object level preemption */
>+   uint32_t csc1;
>+
>+   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
>+                   .ReplayMode = ObjectLevelPreemption,
>+                   .ReplayModeMask = 1);
>+   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
>+      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
>+      lri.DataDWord        = csc1;
>+   }
>+#endif
>+
>    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
>
>    assert(batch.next <= batch.end);
>diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
>index 8bf6f68b67c..f0994d3b139 100644
>--- a/src/mesa/drivers/dri/i965/brw_defines.h
>+++ b/src/mesa/drivers/dri/i965/brw_defines.h
>@@ -1661,4 +1661,9 @@ enum brw_pixel_shader_coverage_mask_mode {
> # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
> # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
>
>+#define CS_CHICKEN1                        0x2580 /* Gen9+ */
>+# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
>+# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
>+# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
>+
> #endif
>diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
>index 86c12e4d357..a90dc01d87b 100644
>--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
>+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
>@@ -115,6 +115,16 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
>       OUT_BATCH(0);
>       ADVANCE_BATCH();
>    }
>+
>+   if (devinfo->gen >= 9) {
>+      /* A fixed function pipe flush is required before modifying this field */
>+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
>+
>+      /* enable object level preemption */
>+      brw_load_register_imm32(brw, CS_CHICKEN1,
>+                              GEN9_REPLAY_MODE_MIDOBJECT |
>+                              GEN9_REPLAY_MODE_MASK);
>+   }

One other option BTW is a context parameter in i915 driver. Note sure if you
discussed this with the i915 folks, but it could make some sense.


> }
>
> static inline const struct brw_tracked_state *

Everything looks in order to me.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

>-- 
>2.14.3
>
On Fri, Feb 16, 2018 at 06:37:55PM -0800, Ben Widawsky wrote:
> On 18-02-16 13:44:00, Antognolli, Rafael wrote:
> > "This field controls the granularity of the replay mechanism when
> > coming back into a previously preempted context."
> > 
> > The kernel disables this bit but whitelists the register, and it's a
> > context register. So enable it and take advantage of finer granularity
> > when preemption is available.
> > 
> 
> Does the kernel actually disable it? I thought the kernel just doesn't touch it
> (I don't think it's whitelisted by the kernel either, it's just writable).

I'm seeing it being disabled at WaDisable3DMidCmdPreemption, seems to be
in effect since commit 5152defe4a53ad15e6d96c422440152302c8abd7.

And it's whitelisted by WaEnablePreemptionGranularityControlByUMD.

> > Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
> > Cc: Ben Widawsky <ben@bwidawsk.net>
> > ---
> > 
> > This patch still needs more testing (only ran it through CI and also did
> > some basic tests on my machine to make sure it's not breaking anything).
> > 
> > src/intel/genxml/gen10.xml                   |  8 ++++++++
> > src/intel/genxml/gen11.xml                   |  8 ++++++++
> > src/intel/genxml/gen9.xml                    |  8 ++++++++
> > src/intel/vulkan/genX_state.c                | 18 ++++++++++++++++++
> > src/mesa/drivers/dri/i965/brw_defines.h      |  5 +++++
> > src/mesa/drivers/dri/i965/brw_state_upload.c | 10 ++++++++++
> > 6 files changed, 57 insertions(+)
> > 
> > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> > index 47c679a3fa9..42ac6e82696 100644
> > --- a/src/intel/genxml/gen10.xml
> > +++ b/src/intel/genxml/gen10.xml
> > @@ -3692,6 +3692,14 @@
> >     <field name="All Allocation" start="25" end="31" type="uint"/>
> >   </register>
> > 
> > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > +      <value name="Object Level Preemption" value="1"/>
> > +    </field>
> > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > +  </register>
> > +
> >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> >     <field name="Write Offset" start="2" end="31" type="offset"/>
> >   </register>
> > diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
> > index 9a8a2fe21e3..e6ce42b2bfb 100644
> > --- a/src/intel/genxml/gen11.xml
> > +++ b/src/intel/genxml/gen11.xml
> > @@ -3688,6 +3688,14 @@
> >     <field name="All Allocation" start="25" end="31" type="uint"/>
> >   </register>
> > 
> > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > +      <value name="Object Level Preemption" value="1"/>
> > +    </field>
> > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > +  </register>
> > +
> >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> >     <field name="Write Offset" start="2" end="31" type="offset"/>
> >   </register>
> > diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
> > index 7eef4bee013..45e1fddeb50 100644
> > --- a/src/intel/genxml/gen9.xml
> > +++ b/src/intel/genxml/gen9.xml
> > @@ -3638,6 +3638,14 @@
> >     <field name="All Allocation" start="25" end="31" type="uint"/>
> >   </register>
> > 
> > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > +      <value name="Object Level Preemption" value="1"/>
> > +    </field>
> > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > +  </register>
> > +
> >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> >     <field name="Write Offset" start="2" end="31" type="offset"/>
> >   </register>
> > diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> > index 54fb8634fdc..83b6c6387f3 100644
> > --- a/src/intel/vulkan/genX_state.c
> > +++ b/src/intel/vulkan/genX_state.c
> > @@ -169,6 +169,24 @@ genX(init_device_state)(struct anv_device *device)
> >    gen10_emit_wa_lri_to_cache_mode_zero(&batch);
> > #endif
> > 
> > +#if GEN_GEN >= 9
> > +   /* A fixed function pipe flush is required before modifying this field */
> > +   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
> > +      pipe.PipeControlFlushEnable = true;
> > +   }
> > +
> > +   /* enable object level preemption */
> > +   uint32_t csc1;
> > +
> > +   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
> > +                   .ReplayMode = ObjectLevelPreemption,
> > +                   .ReplayModeMask = 1);
> > +   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
> > +      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
> > +      lri.DataDWord        = csc1;
> > +   }
> > +#endif
> > +
> >    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
> > 
> >    assert(batch.next <= batch.end);
> > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> > index 8bf6f68b67c..f0994d3b139 100644
> > --- a/src/mesa/drivers/dri/i965/brw_defines.h
> > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> > @@ -1661,4 +1661,9 @@ enum brw_pixel_shader_coverage_mask_mode {
> > # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
> > # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
> > 
> > +#define CS_CHICKEN1                        0x2580 /* Gen9+ */
> > +# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
> > +# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
> > +# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
> > +
> > #endif
> > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > index 86c12e4d357..a90dc01d87b 100644
> > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > @@ -115,6 +115,16 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
> >       OUT_BATCH(0);
> >       ADVANCE_BATCH();
> >    }
> > +
> > +   if (devinfo->gen >= 9) {
> > +      /* A fixed function pipe flush is required before modifying this field */
> > +      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
> > +
> > +      /* enable object level preemption */
> > +      brw_load_register_imm32(brw, CS_CHICKEN1,
> > +                              GEN9_REPLAY_MODE_MIDOBJECT |
> > +                              GEN9_REPLAY_MODE_MASK);
> > +   }
> 
> One other option BTW is a context parameter in i915 driver. Note sure if you
> discussed this with the i915 folks, but it could make some sense.

I haven't, but will do that. Thanks for the review.

> > }
> > 
> > static inline const struct brw_tracked_state *
> 
> Everything looks in order to me.
> 
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> 
> > -- 
> > 2.14.3
> >
On Tue, Feb 20, 2018 at 08:11:14AM -0800, Rafael Antognolli wrote:
> On Fri, Feb 16, 2018 at 06:37:55PM -0800, Ben Widawsky wrote:
> > On 18-02-16 13:44:00, Antognolli, Rafael wrote:
> > > "This field controls the granularity of the replay mechanism when
> > > coming back into a previously preempted context."
> > > 
> > > The kernel disables this bit but whitelists the register, and it's a
> > > context register. So enable it and take advantage of finer granularity
> > > when preemption is available.
> > > 
> > 
> > Does the kernel actually disable it? I thought the kernel just doesn't touch it
> > (I don't think it's whitelisted by the kernel either, it's just writable).
> 
> I'm seeing it being disabled at WaDisable3DMidCmdPreemption, seems to be
> in effect since commit 5152defe4a53ad15e6d96c422440152302c8abd7.
> 
> And it's whitelisted by WaEnablePreemptionGranularityControlByUMD.
> 
> > > Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
> > > Cc: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > > 
> > > This patch still needs more testing (only ran it through CI and also did
> > > some basic tests on my machine to make sure it's not breaking anything).
> > > 
> > > src/intel/genxml/gen10.xml                   |  8 ++++++++
> > > src/intel/genxml/gen11.xml                   |  8 ++++++++
> > > src/intel/genxml/gen9.xml                    |  8 ++++++++
> > > src/intel/vulkan/genX_state.c                | 18 ++++++++++++++++++
> > > src/mesa/drivers/dri/i965/brw_defines.h      |  5 +++++
> > > src/mesa/drivers/dri/i965/brw_state_upload.c | 10 ++++++++++
> > > 6 files changed, 57 insertions(+)
> > > 
> > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> > > index 47c679a3fa9..42ac6e82696 100644
> > > --- a/src/intel/genxml/gen10.xml
> > > +++ b/src/intel/genxml/gen10.xml
> > > @@ -3692,6 +3692,14 @@
> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
> > >   </register>
> > > 
> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > > +      <value name="Object Level Preemption" value="1"/>
> > > +    </field>
> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > > +  </register>
> > > +
> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
> > >   </register>
> > > diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
> > > index 9a8a2fe21e3..e6ce42b2bfb 100644
> > > --- a/src/intel/genxml/gen11.xml
> > > +++ b/src/intel/genxml/gen11.xml
> > > @@ -3688,6 +3688,14 @@
> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
> > >   </register>
> > > 
> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > > +      <value name="Object Level Preemption" value="1"/>
> > > +    </field>
> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > > +  </register>
> > > +
> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
> > >   </register>
> > > diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
> > > index 7eef4bee013..45e1fddeb50 100644
> > > --- a/src/intel/genxml/gen9.xml
> > > +++ b/src/intel/genxml/gen9.xml
> > > @@ -3638,6 +3638,14 @@
> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
> > >   </register>
> > > 
> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
> > > +      <value name="Object Level Preemption" value="1"/>
> > > +    </field>
> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
> > > +  </register>
> > > +
> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
> > >   </register>
> > > diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
> > > index 54fb8634fdc..83b6c6387f3 100644
> > > --- a/src/intel/vulkan/genX_state.c
> > > +++ b/src/intel/vulkan/genX_state.c
> > > @@ -169,6 +169,24 @@ genX(init_device_state)(struct anv_device *device)
> > >    gen10_emit_wa_lri_to_cache_mode_zero(&batch);
> > > #endif
> > > 
> > > +#if GEN_GEN >= 9
> > > +   /* A fixed function pipe flush is required before modifying this field */
> > > +   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
> > > +      pipe.PipeControlFlushEnable = true;
> > > +   }
> > > +
> > > +   /* enable object level preemption */
> > > +   uint32_t csc1;
> > > +
> > > +   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
> > > +                   .ReplayMode = ObjectLevelPreemption,
> > > +                   .ReplayModeMask = 1);
> > > +   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
> > > +      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
> > > +      lri.DataDWord        = csc1;
> > > +   }
> > > +#endif
> > > +
> > >    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
> > > 
> > >    assert(batch.next <= batch.end);
> > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> > > index 8bf6f68b67c..f0994d3b139 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_defines.h
> > > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> > > @@ -1661,4 +1661,9 @@ enum brw_pixel_shader_coverage_mask_mode {
> > > # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
> > > # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
> > > 
> > > +#define CS_CHICKEN1                        0x2580 /* Gen9+ */
> > > +# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
> > > +# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
> > > +# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
> > > +
> > > #endif
> > > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > > index 86c12e4d357..a90dc01d87b 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> > > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > > @@ -115,6 +115,16 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
> > >       OUT_BATCH(0);
> > >       ADVANCE_BATCH();
> > >    }
> > > +
> > > +   if (devinfo->gen >= 9) {
> > > +      /* A fixed function pipe flush is required before modifying this field */
> > > +      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
> > > +
> > > +      /* enable object level preemption */
> > > +      brw_load_register_imm32(brw, CS_CHICKEN1,
> > > +                              GEN9_REPLAY_MODE_MIDOBJECT |
> > > +                              GEN9_REPLAY_MODE_MASK);
> > > +   }
> > 
> > One other option BTW is a context parameter in i915 driver. Note sure if you
> > discussed this with the i915 folks, but it could make some sense.
> 
> I haven't, but will do that. Thanks for the review.

On a second thought, specially for gen9 where we have some workarounds
that involve disabling mid object preemption for some types of
primitives, it might be worth to enable/disable this bit on userspace.
If we just set a bit a context creation, there's no way for the kernel
to disable it when needed, and we would have to end up setting/resetting
it anyway.

> > > }
> > > 
> > > static inline const struct brw_tracked_state *
> > 
> > Everything looks in order to me.
> > 
> > Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> > 
> > > -- 
> > > 2.14.3
> > >
On 18-02-20 09:15:01, Antognolli, Rafael wrote:
>On Tue, Feb 20, 2018 at 08:11:14AM -0800, Rafael Antognolli wrote:
>> On Fri, Feb 16, 2018 at 06:37:55PM -0800, Ben Widawsky wrote:
>> > On 18-02-16 13:44:00, Antognolli, Rafael wrote:
>> > > "This field controls the granularity of the replay mechanism when
>> > > coming back into a previously preempted context."
>> > >
>> > > The kernel disables this bit but whitelists the register, and it's a
>> > > context register. So enable it and take advantage of finer granularity
>> > > when preemption is available.
>> > >
>> >
>> > Does the kernel actually disable it? I thought the kernel just doesn't touch it
>> > (I don't think it's whitelisted by the kernel either, it's just writable).
>>
>> I'm seeing it being disabled at WaDisable3DMidCmdPreemption, seems to be
>> in effect since commit 5152defe4a53ad15e6d96c422440152302c8abd7.
>>
>> And it's whitelisted by WaEnablePreemptionGranularityControlByUMD.
>>
>> > > Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
>> > > Cc: Ben Widawsky <ben@bwidawsk.net>
>> > > ---
>> > >
>> > > This patch still needs more testing (only ran it through CI and also did
>> > > some basic tests on my machine to make sure it's not breaking anything).
>> > >
>> > > src/intel/genxml/gen10.xml                   |  8 ++++++++
>> > > src/intel/genxml/gen11.xml                   |  8 ++++++++
>> > > src/intel/genxml/gen9.xml                    |  8 ++++++++
>> > > src/intel/vulkan/genX_state.c                | 18 ++++++++++++++++++
>> > > src/mesa/drivers/dri/i965/brw_defines.h      |  5 +++++
>> > > src/mesa/drivers/dri/i965/brw_state_upload.c | 10 ++++++++++
>> > > 6 files changed, 57 insertions(+)
>> > >
>> > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
>> > > index 47c679a3fa9..42ac6e82696 100644
>> > > --- a/src/intel/genxml/gen10.xml
>> > > +++ b/src/intel/genxml/gen10.xml
>> > > @@ -3692,6 +3692,14 @@
>> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
>> > >   </register>
>> > >
>> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
>> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
>> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
>> > > +      <value name="Object Level Preemption" value="1"/>
>> > > +    </field>
>> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>> > > +  </register>
>> > > +
>> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
>> > >   </register>
>> > > diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
>> > > index 9a8a2fe21e3..e6ce42b2bfb 100644
>> > > --- a/src/intel/genxml/gen11.xml
>> > > +++ b/src/intel/genxml/gen11.xml
>> > > @@ -3688,6 +3688,14 @@
>> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
>> > >   </register>
>> > >
>> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
>> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
>> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
>> > > +      <value name="Object Level Preemption" value="1"/>
>> > > +    </field>
>> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>> > > +  </register>
>> > > +
>> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
>> > >   </register>
>> > > diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
>> > > index 7eef4bee013..45e1fddeb50 100644
>> > > --- a/src/intel/genxml/gen9.xml
>> > > +++ b/src/intel/genxml/gen9.xml
>> > > @@ -3638,6 +3638,14 @@
>> > >     <field name="All Allocation" start="25" end="31" type="uint"/>
>> > >   </register>
>> > >
>> > > +  <register name="CS_CHICKEN1" length="1" num="0x2580">
>> > > +    <field name="Replay Mode" start="0" end="0" type="uint">
>> > > +      <value name="Mid-cmdbuffer Preemption" value="0"/>
>> > > +      <value name="Object Level Preemption" value="1"/>
>> > > +    </field>
>> > > +    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
>> > > +  </register>
>> > > +
>> > >   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
>> > >     <field name="Write Offset" start="2" end="31" type="offset"/>
>> > >   </register>
>> > > diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
>> > > index 54fb8634fdc..83b6c6387f3 100644
>> > > --- a/src/intel/vulkan/genX_state.c
>> > > +++ b/src/intel/vulkan/genX_state.c
>> > > @@ -169,6 +169,24 @@ genX(init_device_state)(struct anv_device *device)
>> > >    gen10_emit_wa_lri_to_cache_mode_zero(&batch);
>> > > #endif
>> > >
>> > > +#if GEN_GEN >= 9
>> > > +   /* A fixed function pipe flush is required before modifying this field */
>> > > +   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
>> > > +      pipe.PipeControlFlushEnable = true;
>> > > +   }
>> > > +
>> > > +   /* enable object level preemption */
>> > > +   uint32_t csc1;
>> > > +
>> > > +   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
>> > > +                   .ReplayMode = ObjectLevelPreemption,
>> > > +                   .ReplayModeMask = 1);
>> > > +   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
>> > > +      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
>> > > +      lri.DataDWord        = csc1;
>> > > +   }
>> > > +#endif
>> > > +
>> > >    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
>> > >
>> > >    assert(batch.next <= batch.end);
>> > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
>> > > index 8bf6f68b67c..f0994d3b139 100644
>> > > --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> > > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> > > @@ -1661,4 +1661,9 @@ enum brw_pixel_shader_coverage_mask_mode {
>> > > # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
>> > > # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
>> > >
>> > > +#define CS_CHICKEN1                        0x2580 /* Gen9+ */
>> > > +# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
>> > > +# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
>> > > +# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
>> > > +
>> > > #endif
>> > > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> > > index 86c12e4d357..a90dc01d87b 100644
>> > > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
>> > > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> > > @@ -115,6 +115,16 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
>> > >       OUT_BATCH(0);
>> > >       ADVANCE_BATCH();
>> > >    }
>> > > +
>> > > +   if (devinfo->gen >= 9) {
>> > > +      /* A fixed function pipe flush is required before modifying this field */
>> > > +      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
>> > > +
>> > > +      /* enable object level preemption */
>> > > +      brw_load_register_imm32(brw, CS_CHICKEN1,
>> > > +                              GEN9_REPLAY_MODE_MIDOBJECT |
>> > > +                              GEN9_REPLAY_MODE_MASK);
>> > > +   }
>> >
>> > One other option BTW is a context parameter in i915 driver. Note sure if you
>> > discussed this with the i915 folks, but it could make some sense.
>>
>> I haven't, but will do that. Thanks for the review.
>
>On a second thought, specially for gen9 where we have some workarounds
>that involve disabling mid object preemption for some types of
>primitives, it might be worth to enable/disable this bit on userspace.
>If we just set a bit a context creation, there's no way for the kernel
>to disable it when needed, and we would have to end up setting/resetting
>it anyway.
>

Okay.

>> > > }
>> > >
>> > > static inline const struct brw_tracked_state *
>> >
>> > Everything looks in order to me.
>> >
>> > Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
>> >
>> > > --
>> > > 2.14.3
>> > >