intel/blorp: Emit VF cache invalidates for 48-bit bugs with softpin.

Submitted by Kenneth Graunke on June 1, 2018, 2:20 a.m.

Details

Message ID 20180601022013.3691-1-kenneth@whitecape.org
State New
Headers show
Series "intel/blorp: Emit VF cache invalidates for 48-bit bugs with softpin." ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Kenneth Graunke June 1, 2018, 2:20 a.m.
commit 92f01fc5f914fd500497d0c3aed75f3ac8dc054d made i965 start emitting
VF cache invalidates when the high bits of vertex buffers change.  But
we were not tracking vertex buffers emitted by BLORP.  This was papered
over by a mistake where I emitted VF cache invalidates all the time,
which Chris fixed in commit 3ac5fbadfd8644d30fce9ff267cb811ad157996a.

This patch adds a new hook which allows the driver to track addresses
and request a VF cache invalidate as appropriate.

Fixes: 92f01fc5f914 ("i965: Emit VF cache invalidates for 48-bit addressing bugs with softpin.")
---
 src/intel/blorp/blorp_genX_exec.h           | 20 +++++++++++++++++
 src/intel/vulkan/genX_blorp_exec.c          | 10 +++++++++
 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 24 +++++++++++++++++++++
 3 files changed, 54 insertions(+)

Gross!

Patch hide | download patch | download mbox

diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 446743b5910..5f2f1dfccb0 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -59,6 +59,9 @@  blorp_alloc_dynamic_state(struct blorp_batch *batch,
 static void *
 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
                           struct blorp_address *addr);
+static bool
+blorp_need_vf_invalidate(struct blorp_batch *batch, unsigned vb_idx,
+                         struct blorp_address addr);
 
 #if GEN_GEN >= 8
 static struct blorp_address
@@ -297,6 +300,8 @@  static void
 blorp_emit_vertex_buffers(struct blorp_batch *batch,
                           const struct blorp_params *params)
 {
+   bool need_vf_invalidate = false;
+
    struct GENX(VERTEX_BUFFER_STATE) vb[3];
    memset(vb, 0, sizeof(vb));
 
@@ -305,9 +310,13 @@  blorp_emit_vertex_buffers(struct blorp_batch *batch,
    blorp_emit_vertex_data(batch, params, &addr, &size);
    blorp_fill_vertex_buffer_state(batch, vb, 0, addr, size, 3 * sizeof(float));
 
+   need_vf_invalidate |= blorp_need_vf_invalidate(batch, 0, addr);
+
    blorp_emit_input_varying_data(batch, params, &addr, &size);
    blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
 
+   need_vf_invalidate |= blorp_need_vf_invalidate(batch, 1, addr);
+
    uint32_t num_vbs = 2;
    if (params->dst_clear_color_as_input) {
       const unsigned clear_color_size =
@@ -315,6 +324,8 @@  blorp_emit_vertex_buffers(struct blorp_batch *batch,
       blorp_fill_vertex_buffer_state(batch, vb, num_vbs++,
                                      params->dst.clear_color_addr,
                                      clear_color_size, 0);
+      need_vf_invalidate |=
+         blorp_need_vf_invalidate(batch, num_vbs, params->dst.clear_color_addr);
    }
 
    const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
@@ -322,6 +333,15 @@  blorp_emit_vertex_buffers(struct blorp_batch *batch,
    if (!dw)
       return;
 
+#if GEN_GEN >= 8
+   if (need_vf_invalidate) {
+      /* See vf_invalidate_for_48bit_transitions in i965's genX_state_upload.c */
+      blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
+         pipe.VFCacheInvalidationEnable = true;
+      }
+   }
+#endif
+
    for (unsigned i = 0; i < num_vbs; i++) {
       GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
       dw += GENX(VERTEX_BUFFER_STATE_length);
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 9023269d61b..e8293934edd 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -152,6 +152,16 @@  blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
    return vb_state.map;
 }
 
+static bool
+blorp_need_vf_invalidate(struct blorp_batch *batch, unsigned vb_idx,
+                         struct blorp_address addr)
+{
+   /* anv forces all vertex buffers into the low 4GB so there are never any
+    * transitions that require a VF invalidation.
+    */
+   return false;
+}
+
 #if GEN_GEN >= 8
 static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 808bff0db85..8ad1ff015e4 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -189,6 +189,30 @@  blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
    return data;
 }
 
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+   return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+static bool
+blorp_need_vf_invalidate(struct blorp_batch *batch, unsigned vb_idx,
+                         struct blorp_address addr)
+{
+#if GEN_GEN >= 8
+   struct brw_context *brw = batch->driver_batch;
+   struct brw_bo *bo = addr.buffer;
+   uint16_t high_bits =
+      bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+
+   if (high_bits != brw->vb.last_bo_high_bits[vb_idx]) {
+      brw->vb.last_bo_high_bits[vb_idx] = high_bits;
+      return true;
+   }
+#endif
+   return false;
+}
+
 #if GEN_GEN >= 8
 static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)