[RFC,2/2] panfrost/midgard: Ignore imov/fmov distinction

Submitted by Alyssa Rosenzweig on May 13, 2019, 3:48 a.m.

Details

Message ID 20190513034801.3770-2-alyssa@rosenzweig.io
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Alyssa Rosenzweig May 13, 2019, 3:48 a.m.
We use nir_gather_ssa_types, rather than the instruction name, to decide
how moves should be compiled. This is important since the imov/fmov
never mapped to what Midgard needed to begin with. This should allow
Jason's imov/fmov merger to proceed without regressing Panfrost, since
this is one less buggy behaviour we rely on.

A great deal of future work is required for handling registers, which
likely accounts for some regressions in dEQP.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Cc: Jason Ekstrand <jason@jlekstrand.net>
---
 .../panfrost/midgard/midgard_compile.c        | 81 ++++++++++++-------
 1 file changed, 53 insertions(+), 28 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 4a26ba769b2..b0985f55635 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -511,6 +511,10 @@  typedef struct compiler_context {
         unsigned sysvals[MAX_SYSVAL_COUNT];
         unsigned sysval_count;
         struct hash_table_u64 *sysval_to_id;
+
+        /* Mapping for int/float words, filled out */
+        BITSET_WORD *float_types;
+        BITSET_WORD *int_types;
 } compiler_context;
 
 /* Append instruction to end of current block */
@@ -1153,10 +1157,41 @@  emit_indirect_offset(compiler_context *ctx, nir_src *src)
         emit_mir_instruction(ctx, ins);
 }
 
+static bool
+is_probably_int(compiler_context *ctx, unsigned ssa_index)
+{
+        /* TODO: Extend to registers XXX... assume float for now since that's
+         * slightly safer for reasons we don't totally get.. */
+
+        if (ssa_index >= ctx->func->impl->ssa_alloc)
+                return false;
+
+        bool is_float = BITSET_TEST(ctx->float_types, ssa_index);
+        bool is_int = BITSET_TEST(ctx->int_types, ssa_index);
+
+        if (is_float && !is_int)
+                return false;
+
+        if (is_int && !is_float)
+                return true;
+
+        /* TODO: Other cases.. if we're not sure but it is SSA, try int... this
+         * is all kinda arbitrary right now */
+        return true;
+}
+
 #define ALU_CASE(nir, _op) \
 	case nir_op_##nir: \
 		op = midgard_alu_op_##_op; \
 		break;
+
+#define ALU_CASE_IF(nir, _fop, _iop) \
+        case nir_op_##nir: \
+                op = is_probably_int(ctx, dest) \
+                        ? midgard_alu_op_##_iop : \
+                          midgard_alu_op_##_fop; \
+                break;
+
 static bool
 nir_is_fzero_constant(nir_src src)
 {
@@ -1198,7 +1233,6 @@  emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 ALU_CASE(imax, imax);
                 ALU_CASE(umin, umin);
                 ALU_CASE(umax, umax);
-                ALU_CASE(fmov, fmov);
                 ALU_CASE(ffloor, ffloor);
                 ALU_CASE(fround_even, froundeven);
                 ALU_CASE(ftrunc, ftrunc);
@@ -1209,7 +1243,10 @@  emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 ALU_CASE(isub, isub);
                 ALU_CASE(imul, imul);
                 ALU_CASE(iabs, iabs);
-                ALU_CASE(imov, imov);
+
+                /* NIR is typeless here */
+                ALU_CASE_IF(imov, fmov, imov);
+                ALU_CASE_IF(fmov, fmov, imov);
 
                 ALU_CASE(feq32, feq);
                 ALU_CASE(fne32, fne);
@@ -3361,31 +3398,6 @@  midgard_opt_copy_prop_tex(compiler_context *ctx, midgard_block *block)
         return progress;
 }
 
-/* We don't really understand the imov/fmov split, so always use fmov (but let
- * it be imov in the IR so we don't do unsafe floating point "optimizations"
- * and break things */
-
-static void
-midgard_imov_workaround(compiler_context *ctx, midgard_block *block)
-{
-        mir_foreach_instr_in_block_safe(block, ins) {
-                if (ins->type != TAG_ALU_4) continue;
-                if (ins->alu.op != midgard_alu_op_imov) continue;
-
-                ins->alu.op = midgard_alu_op_fmov;
-                ins->alu.outmod = midgard_outmod_none;
-
-                /* Remove flags that don't make sense */
-
-                midgard_vector_alu_src s =
-                        vector_alu_from_unsigned(ins->alu.src2);
-
-                s.mod = 0;
-
-                ins->alu.src2 = vector_alu_srco_unsigned(s);
-        }
-}
-
 /* The following passes reorder MIR instructions to enable better scheduling */
 
 static void
@@ -3637,7 +3649,6 @@  emit_block(compiler_context *ctx, nir_block *block)
 
         midgard_emit_store(ctx, this_block);
         midgard_pair_load_store(ctx, this_block);
-        midgard_imov_workaround(ctx, this_block);
 
         /* Append fragment shader epilogue (value writeout) */
         if (ctx->stage == MESA_SHADER_FRAGMENT) {
@@ -3919,9 +3930,23 @@  midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                 ctx->block_count = 0;
                 ctx->func = func;
 
+                /* Certain instructions are typeless in NIR but int/float
+                 * typed in Midgard, so we need to gather types
+                 * per-function */
+
+                ctx->float_types = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
+                ctx->int_types = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
+                nir_gather_ssa_types(func->impl, ctx->float_types, ctx->int_types);
+
+                /* Start the block-by-block emit */
+
                 emit_cf_list(ctx, &func->impl->body);
                 emit_block(ctx, func->impl->end_block);
 
+                /* Types are per-function, so cleanup */
+                free(ctx->float_types);
+                free(ctx->int_types);
+
                 break; /* TODO: Multi-function shaders */
         }
 

Comments

Using nir_gather_ssa_types is wrong. In both Midgard and NIR, SSA
values are just a bunch of bits with no int/float distinction, and
therefore you shouldn't need to know how a register is used to compile
the instruction producing it. The only distinction between imov and
fmov, in both NIR and the Midgard ISA, is what modifiers they support.
What you want to do is probably what you originally did, i.e. use fmov
for NIR fmov as well as fabs and fneg, imov for imov (and if we delete
fmov, just using it for fabs and fneg). If this fixes any bugs, it's
just papering over bugs in your backend, and you should fix them
instead. Note that later GLSL versions have intBitsToFloat() and
floatBitsToInt() which blow away any assumption that the types will be
consistent.

On Mon, May 13, 2019 at 5:48 AM Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
>
> We use nir_gather_ssa_types, rather than the instruction name, to decide
> how moves should be compiled. This is important since the imov/fmov
> never mapped to what Midgard needed to begin with. This should allow
> Jason's imov/fmov merger to proceed without regressing Panfrost, since
> this is one less buggy behaviour we rely on.
>
> A great deal of future work is required for handling registers, which
> likely accounts for some regressions in dEQP.
>
> Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> ---
>  .../panfrost/midgard/midgard_compile.c        | 81 ++++++++++++-------
>  1 file changed, 53 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
> index 4a26ba769b2..b0985f55635 100644
> --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
> +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
> @@ -511,6 +511,10 @@ typedef struct compiler_context {
>          unsigned sysvals[MAX_SYSVAL_COUNT];
>          unsigned sysval_count;
>          struct hash_table_u64 *sysval_to_id;
> +
> +        /* Mapping for int/float words, filled out */
> +        BITSET_WORD *float_types;
> +        BITSET_WORD *int_types;
>  } compiler_context;
>
>  /* Append instruction to end of current block */
> @@ -1153,10 +1157,41 @@ emit_indirect_offset(compiler_context *ctx, nir_src *src)
>          emit_mir_instruction(ctx, ins);
>  }
>
> +static bool
> +is_probably_int(compiler_context *ctx, unsigned ssa_index)
> +{
> +        /* TODO: Extend to registers XXX... assume float for now since that's
> +         * slightly safer for reasons we don't totally get.. */
> +
> +        if (ssa_index >= ctx->func->impl->ssa_alloc)
> +                return false;
> +
> +        bool is_float = BITSET_TEST(ctx->float_types, ssa_index);
> +        bool is_int = BITSET_TEST(ctx->int_types, ssa_index);
> +
> +        if (is_float && !is_int)
> +                return false;
> +
> +        if (is_int && !is_float)
> +                return true;
> +
> +        /* TODO: Other cases.. if we're not sure but it is SSA, try int... this
> +         * is all kinda arbitrary right now */
> +        return true;
> +}
> +
>  #define ALU_CASE(nir, _op) \
>         case nir_op_##nir: \
>                 op = midgard_alu_op_##_op; \
>                 break;
> +
> +#define ALU_CASE_IF(nir, _fop, _iop) \
> +        case nir_op_##nir: \
> +                op = is_probably_int(ctx, dest) \
> +                        ? midgard_alu_op_##_iop : \
> +                          midgard_alu_op_##_fop; \
> +                break;
> +
>  static bool
>  nir_is_fzero_constant(nir_src src)
>  {
> @@ -1198,7 +1233,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
>                  ALU_CASE(imax, imax);
>                  ALU_CASE(umin, umin);
>                  ALU_CASE(umax, umax);
> -                ALU_CASE(fmov, fmov);
>                  ALU_CASE(ffloor, ffloor);
>                  ALU_CASE(fround_even, froundeven);
>                  ALU_CASE(ftrunc, ftrunc);
> @@ -1209,7 +1243,10 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
>                  ALU_CASE(isub, isub);
>                  ALU_CASE(imul, imul);
>                  ALU_CASE(iabs, iabs);
> -                ALU_CASE(imov, imov);
> +
> +                /* NIR is typeless here */
> +                ALU_CASE_IF(imov, fmov, imov);
> +                ALU_CASE_IF(fmov, fmov, imov);
>
>                  ALU_CASE(feq32, feq);
>                  ALU_CASE(fne32, fne);
> @@ -3361,31 +3398,6 @@ midgard_opt_copy_prop_tex(compiler_context *ctx, midgard_block *block)
>          return progress;
>  }
>
> -/* We don't really understand the imov/fmov split, so always use fmov (but let
> - * it be imov in the IR so we don't do unsafe floating point "optimizations"
> - * and break things */
> -
> -static void
> -midgard_imov_workaround(compiler_context *ctx, midgard_block *block)
> -{
> -        mir_foreach_instr_in_block_safe(block, ins) {
> -                if (ins->type != TAG_ALU_4) continue;
> -                if (ins->alu.op != midgard_alu_op_imov) continue;
> -
> -                ins->alu.op = midgard_alu_op_fmov;
> -                ins->alu.outmod = midgard_outmod_none;
> -
> -                /* Remove flags that don't make sense */
> -
> -                midgard_vector_alu_src s =
> -                        vector_alu_from_unsigned(ins->alu.src2);
> -
> -                s.mod = 0;
> -
> -                ins->alu.src2 = vector_alu_srco_unsigned(s);
> -        }
> -}
> -
>  /* The following passes reorder MIR instructions to enable better scheduling */
>
>  static void
> @@ -3637,7 +3649,6 @@ emit_block(compiler_context *ctx, nir_block *block)
>
>          midgard_emit_store(ctx, this_block);
>          midgard_pair_load_store(ctx, this_block);
> -        midgard_imov_workaround(ctx, this_block);
>
>          /* Append fragment shader epilogue (value writeout) */
>          if (ctx->stage == MESA_SHADER_FRAGMENT) {
> @@ -3919,9 +3930,23 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
>                  ctx->block_count = 0;
>                  ctx->func = func;
>
> +                /* Certain instructions are typeless in NIR but int/float
> +                 * typed in Midgard, so we need to gather types
> +                 * per-function */
> +
> +                ctx->float_types = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
> +                ctx->int_types = calloc(BITSET_WORDS(func->impl->ssa_alloc), sizeof(BITSET_WORD));
> +                nir_gather_ssa_types(func->impl, ctx->float_types, ctx->int_types);
> +
> +                /* Start the block-by-block emit */
> +
>                  emit_cf_list(ctx, &func->impl->body);
>                  emit_block(ctx, func->impl->end_block);
>
> +                /* Types are per-function, so cleanup */
> +                free(ctx->float_types);
> +                free(ctx->int_types);
> +
>                  break; /* TODO: Multi-function shaders */
>          }
>
> --
> 2.20.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> Using nir_gather_ssa_types is wrong. In both Midgard and NIR, SSA
> values are just a bunch of bits with no int/float distinction, and
> therefore you shouldn't need to know how a register is used to compile
> the instruction producing it. The only distinction between imov and
> fmov, in both NIR and the Midgard ISA, is what modifiers they support.
> What you want to do is probably what you originally did, i.e. use fmov
> for NIR fmov as well as fabs and fneg, imov for imov (and if we delete
> fmov, just using it for fabs and fneg). If this fixes any bugs, it's
> just papering over bugs in your backend, and you should fix them
> instead. Note that later GLSL versions have intBitsToFloat() and
> floatBitsToInt() which blow away any assumption that the types will be
> consistent.

That's how my mental model was, but it doesn't look to be the case: the
blob is very consistent in emitting imov/fmov distinclty (and
icsel/fcsel), and a lot of bizarre bugs come up if you do it any other
way, even absent any modifiers. There's -some- difference, it's just not
obvious what. Although I'll admit playing with intBitsToFloat/etc allow
seemingly wrong shaders even with the blob..
On Mon, May 13, 2019 at 4:48 PM Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
>
> > Using nir_gather_ssa_types is wrong. In both Midgard and NIR, SSA
> > values are just a bunch of bits with no int/float distinction, and
> > therefore you shouldn't need to know how a register is used to compile
> > the instruction producing it. The only distinction between imov and
> > fmov, in both NIR and the Midgard ISA, is what modifiers they support.
> > What you want to do is probably what you originally did, i.e. use fmov
> > for NIR fmov as well as fabs and fneg, imov for imov (and if we delete
> > fmov, just using it for fabs and fneg). If this fixes any bugs, it's
> > just papering over bugs in your backend, and you should fix them
> > instead. Note that later GLSL versions have intBitsToFloat() and
> > floatBitsToInt() which blow away any assumption that the types will be
> > consistent.
>
> That's how my mental model was, but it doesn't look to be the case: the
> blob is very consistent in emitting imov/fmov distinclty (and
> icsel/fcsel), and a lot of bizarre bugs come up if you do it any other
> way, even absent any modifiers. There's -some- difference, it's just not
> obvious what. Although I'll admit playing with intBitsToFloat/etc allow
> seemingly wrong shaders even with the blob..

This means that you're probably missing something else, and this is
papering over another bug. What the blob happens to use is irrelevant,
and of course you can always force it to do the "wrong" move by
reinterpreting things using intBitsToFloat()/floatBitsToInt().