[Mesa-dev,PATCHv2,12/14] i965/gen7-8: Poke the 3DSTATE UAV access enable bits.

Submitted by Francisco Jerez on Feb. 9, 2015, 7:13 p.m.

Details

Message ID 1423509184-27511-1-git-send-email-currojerez@riseup.net
State New
Headers show

Not browsing as part of any series.

Commit Message

Francisco Jerez Feb. 9, 2015, 7:13 p.m.
v2: Set the PS UAV-only bit on HSW (Ken).
---
 src/mesa/drivers/dri/i965/brw_defines.h   |  4 ++++
 src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 ++++++++-----
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  9 +++++++++
 src/mesa/drivers/dri/i965/gen8_gs_state.c |  4 +++-
 src/mesa/drivers/dri/i965/gen8_ps_state.c |  3 +++
 src/mesa/drivers/dri/i965/gen8_vs_state.c |  4 +++-
 7 files changed, 33 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index fe255cc..d4648ee 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1691,6 +1691,7 @@  enum brw_message_target {
 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
 # define GEN6_VS_FLOATING_POINT_MODE_ALT		(1 << 16)
+# define HSW_VS_UAV_ACCESS_ENABLE                       (1 << 12)
 /* DW4 */
 # define GEN6_VS_DISPATCH_START_GRF_SHIFT		20
 # define GEN6_VS_URB_READ_LENGTH_SHIFT			11
@@ -1716,6 +1717,7 @@  enum brw_message_target {
 # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
 # define GEN6_GS_FLOATING_POINT_MODE_ALT		(1 << 16)
+# define HSW_GS_UAV_ACCESS_ENABLE       		(1 << 12)
 /* DW4 */
 # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT		23
 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT			17
@@ -2242,6 +2244,7 @@  enum brw_wm_barycentric_interp_mode {
 /* DW2 */
 # define GEN7_WM_MSDISPMODE_PERSAMPLE			(0 << 31)
 # define GEN7_WM_MSDISPMODE_PERPIXEL			(1 << 31)
+# define HSW_WM_UAV_ONLY                                (1 << 30)
 
 #define _3DSTATE_PS				0x7820 /* GEN7+ */
 /* DW1: kernel pointer */
@@ -2264,6 +2267,7 @@  enum brw_wm_barycentric_interp_mode {
 # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE	(1 << 8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
 # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE		(1 << 6)
+# define HSW_PS_UAV_ACCESS_ENABLE			(1 << 5)
 # define GEN7_PS_POSOFFSET_NONE				(0 << 3)
 # define GEN7_PS_POSOFFSET_CENTROID			(2 << 3)
 # define GEN7_PS_POSOFFSET_SAMPLE			(3 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index e1c4f8b..39e5201 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,7 +59,9 @@  upload_gs_state(struct brw_context *brw)
       OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
                  GEN6_GS_SAMPLER_COUNT_SHIFT) |
                 ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+                (brw->is_haswell && stage_state->nr_image_params ?
+                 HSW_GS_UAV_ACCESS_ENABLE : 0));
 
       if (brw->gs.prog_data->base.base.total_scratch) {
          OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0e9b4fe..4741b78 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -77,6 +77,7 @@  upload_vs_state(struct brw_context *brw)
    uint32_t floating_point_mode = 0;
    const int max_threads_shift = brw->is_haswell ?
       HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+   const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
 
    if (!brw->is_haswell && !brw->is_baytrail)
       gen7_emit_vs_workaround_flush(brw);
@@ -91,19 +92,21 @@  upload_vs_state(struct brw_context *brw)
 	     ((ALIGN(stage_state->sampler_count, 4)/4) <<
               GEN6_VS_SAMPLER_COUNT_SHIFT) |
              ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
-              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+             (brw->is_haswell && stage_state->nr_image_params ?
+              HSW_VS_UAV_ACCESS_ENABLE : 0));
 
-   if (brw->vs.prog_data->base.base.total_scratch) {
+   if (prog_data->base.total_scratch) {
       OUT_RELOC(stage_state->scratch_bo,
 		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+		ffs(prog_data->base.total_scratch) - 11);
    } else {
       OUT_BATCH(0);
    }
 
-   OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
+   OUT_BATCH((prog_data->base.dispatch_grf_start_reg <<
               GEN6_VS_DISPATCH_START_GRF_SHIFT) |
-	     (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
    OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index e426b30..60d58c5 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -102,6 +102,12 @@  upload_wm_state(struct brw_context *brw)
       dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
    }
 
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   if (brw->is_haswell &&
+       !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
+       brw->wm.base.nr_image_params)
+      dw2 |= HSW_WM_UAV_ONLY;
+
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
    OUT_BATCH(dw1);
@@ -206,6 +212,9 @@  upload_ps_state(struct brw_context *brw)
       _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
    assert(min_inv_per_frag >= 1);
 
+   if (brw->is_haswell && brw->wm.base.nr_image_params)
+      dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
+
    if (prog_data->prog_offset_16 || prog_data->no_8) {
       dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
       if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 95cc123..22997f0 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -53,7 +53,9 @@  gen8_upload_gs_state(struct brw_context *brw)
                 ((ALIGN(stage_state->sampler_count, 4)/4) <<
                  GEN6_GS_SAMPLER_COUNT_SHIFT) |
                 ((prog_data->base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+                (brw->gs.base.nr_image_params ?
+                 HSW_GS_UAV_ACCESS_ENABLE : 0));
 
       if (brw->gs.prog_data->base.base.total_scratch) {
          OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index d4a58e4..b932437 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -62,6 +62,9 @@  upload_ps_extra(struct brw_context *brw)
    if (prog_data->uses_omask)
       dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
 
+   if (brw->wm.base.nr_image_params)
+      dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
    OUT_BATCH(dw1);
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index f92af55..35b381e 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -50,7 +50,9 @@  upload_vs_state(struct brw_context *brw)
              ((ALIGN(stage_state->sampler_count, 4) / 4) <<
                GEN6_VS_SAMPLER_COUNT_SHIFT) |
              ((prog_data->base.binding_table.size_bytes / 4) <<
-               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+             (brw->vs.base.nr_image_params ?
+              HSW_VS_UAV_ACCESS_ENABLE : 0));
 
    if (prog_data->base.total_scratch) {
       OUT_RELOC64(stage_state->scratch_bo,

Comments

On Monday, February 09, 2015 09:13:04 PM Francisco Jerez wrote:
> v2: Set the PS UAV-only bit on HSW (Ken).
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h   |  4 ++++
>  src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++-
>  src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 ++++++++-----
>  src/mesa/drivers/dri/i965/gen7_wm_state.c |  9 +++++++++
>  src/mesa/drivers/dri/i965/gen8_gs_state.c |  4 +++-
>  src/mesa/drivers/dri/i965/gen8_ps_state.c |  3 +++
>  src/mesa/drivers/dri/i965/gen8_vs_state.c |  4 +++-
>  7 files changed, 33 insertions(+), 8 deletions(-)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>