[Mesa-dev,3/4] nir/load_combine: expand the pass to support load-after-store.

Submitted by Iago Toral Quiroga on Oct. 27, 2015, 9:29 a.m.

Details

Message ID 1445938141-28845-4-git-send-email-itoral@igalia.com
State New
Headers show
Series "Nir: implement a load-combine pass" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Oct. 27, 2015, 9:29 a.m.
---
 src/glsl/nir/nir_opt_load_combine.c | 107 +++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/glsl/nir/nir_opt_load_combine.c b/src/glsl/nir/nir_opt_load_combine.c
index 926b1ab..effba13 100644
--- a/src/glsl/nir/nir_opt_load_combine.c
+++ b/src/glsl/nir/nir_opt_load_combine.c
@@ -288,6 +288,102 @@  set_invalidate_for_store(struct nir_instr_set *instr_set,
    }
 }
 
+static unsigned
+get_store_writemask(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_store_ssbo_indirect:
+      return instr->const_index[1];
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      return 0x1;
+   default:
+      assert(!"not implemented");
+   }
+}
+
+/*
+ * Traverses the set of load/store intrinsics trying to find a previous store
+ * operation to the same block/offset which we can reuse to re-write a load
+ * from the same block/offset.
+ */
+static bool
+rewrite_load_with_store(struct nir_instr_set *instr_set,
+                        nir_intrinsic_instr *load)
+{
+   nir_src *load_block = NULL;
+   nir_src *load_offset = NULL;
+   unsigned load_const_offset;
+   get_load_store_address(load, &load_block, &load_offset, &load_const_offset);
+
+   for (struct set_entry *entry = _mesa_set_next_entry(instr_set->set, NULL);
+        entry != NULL; entry = _mesa_set_next_entry(instr_set->set, entry)) {
+
+      nir_instr *instr = (nir_instr *) entry->key;
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
+      if (!is_store(store))
+         continue;
+
+      nir_src *store_block = NULL;
+      nir_src *store_offset = NULL;
+      unsigned store_const_offset;
+      get_load_store_address(store, &store_block, &store_offset,
+                             &store_const_offset);
+
+      /* The store must write to all the channels we are loading */
+      unsigned store_writemask = get_store_writemask(store);
+      bool writes_all_channels = true;
+      for (int i = 0; i < load->num_components; i++) {
+         if (!((1 << i) & store_writemask)) {
+            writes_all_channels = false;
+            break;
+         }
+      }
+      if (!writes_all_channels)
+         continue;
+
+      /* blocks must match */
+      if ((load_block->ssa->parent_instr->type !=
+           store_block->ssa->parent_instr->type) ||
+          !nir_srcs_equal(*store_block, *load_block))
+         continue;
+
+      /* address type (indirect/direct) must match */
+      if (!load_offset != !store_offset)
+         continue;
+
+      /* indirect address mismatch */
+      if (load_offset && store_offset &&
+          !nir_srcs_equal(*store_offset, *load_offset))
+         continue;
+
+      /* direct address mismatch */
+      if (!load_offset && !store_offset &&
+          store_const_offset != load_const_offset)
+         continue;
+
+      /* rewrite the load with this store */
+      nir_ssa_def *def = &load->dest.ssa;
+      nir_ssa_def *new_def = store->src[0].ssa;
+      nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
+      return true;
+   }
+
+   return false;
+}
+
 static bool
 load_combine_block(nir_block *block)
 {
@@ -304,14 +400,21 @@  load_combine_block(nir_block *block)
 
       nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
       if (is_load(intrinsic)) {
-         /* Try to rewrite with a previous load */
+         /* Try to rewrite with a previous load, if that fails, try to
+          * rewrite with a previous store
+          */
          if (nir_instr_set_add_or_rewrite(instr_set, instr)) {
             progress = true;
             nir_instr_remove(instr);
+         } else if(rewrite_load_with_store(instr_set, intrinsic)) {
+            progress = true;
          }
       } else if (is_store(intrinsic)) {
-         /* Invalidate conflicting load/stores */
+         /* Invalidate conflicting load/stores and add the store to the set
+          * so we can rewrite future loads with it
+          */
          set_invalidate_for_store(instr_set, intrinsic);
+         _mesa_set_add(instr_set->set, instr);
       } else if (is_memory_barrier(intrinsic)) {
          /* If we see a memory barrier we have to invalidate all cached
           * load/store operations