[4/5] nir: Make nir_lower_clip_vs optionally work with variables.

Submitted by Kenneth Graunke on Nov. 10, 2018, 2:13 a.m.

Details

Message ID 20181110021331.3987-4-kenneth@whitecape.org
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Kenneth Graunke Nov. 10, 2018, 2:13 a.m.
The way nir_lower_clip_vs() works with store_output intrinsics makes a
ton of assumptions about the driver_location field.

In i965, I'd rather do this lowering early and work with variables.
ir3 and vc4 could probably do that as well, but I'm not sure exactly
what paths would need updating, so for now, handle both methods.
---
 src/compiler/nir/nir.h                      |  2 +-
 src/compiler/nir/nir_lower_clip.c           | 45 ++++++++++++++++-----
 src/gallium/drivers/freedreno/ir3/ir3_nir.c |  2 +-
 src/gallium/drivers/vc4/vc4_program.c       |  3 +-
 4 files changed, 38 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f4f6b106505..9c12837ef01 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2967,7 +2967,7 @@  bool nir_lower_tex(nir_shader *shader,
 
 bool nir_lower_idiv(nir_shader *shader);
 
-bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars);
 bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
 bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
 
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index 2299f746305..4caaf20af8c 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -152,9 +152,12 @@  find_output(nir_shader *shader, unsigned drvloc)
 
 /* ucp_enables is bitmask of enabled ucps.  Actual ucp values are
  * passed in to shader via user_clip_plane system-values
+ *
+ * If use_vars is true, the pass will use variable loads and stores instead
+ * of working with store_output intrinsics.
  */
 bool
-nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars)
 {
    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
    nir_ssa_def *clipdist[MAX_CLIP_PLANES];
@@ -196,17 +199,30 @@  nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
          /* if shader is already writing CLIPDIST, then
           * there should be no user-clip-planes to deal
           * with.
+          *
+          * We assume nir_remove_dead_variables has removed the clipdist
+          * variables if they're not written.
           */
          return false;
       }
    }
 
-   if (clipvertex)
-      cv = find_output(shader, clipvertex->data.driver_location);
-   else if (position)
-      cv = find_output(shader, position->data.driver_location);
-   else
-      return false;
+   if (use_vars) {
+      cv = nir_load_var(&b, clipvertex ? clipvertex : position);
+
+      if (clipvertex) {
+         exec_node_remove(&clipvertex->node);
+         clipvertex->data.mode = nir_var_global;
+         exec_list_push_tail(&shader->globals, &clipvertex->node);
+      }
+   } else {
+      if (clipvertex)
+         cv = find_output(shader, clipvertex->data.driver_location);
+      else if (position)
+         cv = find_output(shader, position->data.driver_location);
+      else
+         return false;
+   }
 
    /* insert CLIPDIST outputs: */
    if (ucp_enables & 0x0f)
@@ -230,10 +246,17 @@  nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
       }
    }
 
-   if (ucp_enables & 0x0f)
-      store_clipdist_output(&b, out[0], &clipdist[0]);
-   if (ucp_enables & 0xf0)
-      store_clipdist_output(&b, out[1], &clipdist[4]);
+   if (use_vars) {
+      if (ucp_enables & 0x0f)
+         nir_store_var(&b, out[0], nir_vec(&b, clipdist, 4), 0xf);
+      if (ucp_enables & 0xf0)
+         nir_store_var(&b, out[1], nir_vec(&b, &clipdist[4], 4), 0xf);
+   } else {
+      if (ucp_enables & 0x0f)
+         store_clipdist_output(&b, out[0], &clipdist[0]);
+      if (ucp_enables & 0xf0)
+         store_clipdist_output(&b, out[1], &clipdist[4]);
+   }
 
    nir_metadata_preserve(impl, nir_metadata_dominance);
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 63866ae4d01..7c2a8f83b62 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -172,7 +172,7 @@  ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
 	if (key) {
 		if (s->info.stage == MESA_SHADER_VERTEX) {
-			OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
+			OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false);
 			if (key->vclamp_color)
 				OPT_V(s, nir_lower_clamp_color_outputs);
 		} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bc9bd76ae95..b98baca30cf 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2363,7 +2363,8 @@  vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
                 if (stage == QSTAGE_FRAG) {
                         NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
                 } else {
-                        NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
+                        NIR_PASS_V(c->s, nir_lower_clip_vs,
+				   c->key->ucp_enables, false);
                         NIR_PASS_V(c->s, nir_lower_io_to_scalar,
                                    nir_var_shader_out);
                 }

Comments

Kenneth Graunke <kenneth@whitecape.org> writes:

> The way nir_lower_clip_vs() works with store_output intrinsics makes a
> ton of assumptions about the driver_location field.
>
> In i965, I'd rather do this lowering early and work with variables.
> ir3 and vc4 could probably do that as well, but I'm not sure exactly
> what paths would need updating, so for now, handle both methods.
> ---
>  src/compiler/nir/nir.h                      |  2 +-
>  src/compiler/nir/nir_lower_clip.c           | 45 ++++++++++++++++-----
>  src/gallium/drivers/freedreno/ir3/ir3_nir.c |  2 +-
>  src/gallium/drivers/vc4/vc4_program.c       |  3 +-

If you hit src/broadcom/compiler/vir.c as well, the series is:

Reviewed-by: Eric Anholt <eric@anholt.net>

Switching vc4/v3d stuff over to variables has been going well and
showing wins -- I think we should probably finish that work and then
simplify these paths by moving this lowering before lower_io.

In the past, I thought we would want to lower_io at gallium state
creation time, since it was some compile work we could do at
compile/link instead of draw.