intel/compiler: update validator to account for half-float exec type promotion

Submitted by Iago Toral Quiroga on Jan. 23, 2019, 12:18 p.m.

Details

Message ID 20190123121851.9652-1-itoral@igalia.com
State New
Headers show
Series "intel/compiler: update validator to account for half-float exec type promotion" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Jan. 23, 2019, 12:18 p.m.
Commit c84ec70b3a72 implemented execution type promotion to 32-bit for
conversions involving half-float registers, which empirical testing suggested
was required, but it did not incorporate this change into the assembly validator
logic. This commits adds that, preventing validation errors like this:

mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
ERROR: Destination stride must be equal to the ratio of the sizes of the
       execution data type to the destination type

Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit when any half-float conversion is needed."
---
 src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index a25010b225c..3bb37677672 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -325,17 +325,20 @@  execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
    unsigned num_sources = num_sources_from_inst(devinfo, inst);
    enum brw_reg_type src0_exec_type, src1_exec_type;
 
-   /* Execution data type is independent of destination data type, except in
-    * mixed F/HF instructions on CHV and SKL+.
+   /* Empirical testing suggests that type conversions involving half-float
+    * promote execution type to 32-bit. See get_exec_type() in brw_ir_fs.h.
     */
    enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst);
 
    src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
    if (num_sources == 1) {
-      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
-          src0_exec_type == BRW_REGISTER_TYPE_HF) {
-         return dst_exec_type;
+      if (type_sz(src0_exec_type) == 2 && dst_exec_type != src0_exec_type) {
+         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
+            return BRW_REGISTER_TYPE_F;
+         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
+            return BRW_REGISTER_TYPE_D;
       }
+
       return src0_exec_type;
    }
 
@@ -367,14 +370,12 @@  execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
        src1_exec_type == BRW_REGISTER_TYPE_DF)
       return BRW_REGISTER_TYPE_DF;
 
-   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
-      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
-          src0_exec_type == BRW_REGISTER_TYPE_F ||
-          src1_exec_type == BRW_REGISTER_TYPE_F) {
-         return BRW_REGISTER_TYPE_F;
-      } else {
-         return BRW_REGISTER_TYPE_HF;
-      }
+   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
+       src0_exec_type == BRW_REGISTER_TYPE_F ||
+       src1_exec_type == BRW_REGISTER_TYPE_F) {
+      return BRW_REGISTER_TYPE_F;
+   } else {
+      return BRW_REGISTER_TYPE_HF;
    }
 
    assert(src0_exec_type == BRW_REGISTER_TYPE_F);

Comments

Iago Toral Quiroga <itoral@igalia.com> writes:

> Commit c84ec70b3a72 implemented execution type promotion to 32-bit for
> conversions involving half-float registers, which empirical testing suggested
> was required, but it did not incorporate this change into the assembly validator
> logic. This commits adds that, preventing validation errors like this:
>

I don't think we should be validating empirical assumptions in the EU
validator.

> mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> ERROR: Destination stride must be equal to the ratio of the sizes of the
>        execution data type to the destination type
>
> Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit when any half-float conversion is needed."

I don't think this "fixes" anything that ever worked.  The validator is
still missing an implementation of the quirky HF restrictions, and it
wasn't the purpose of c84ec70b3a72 to do such a thing.  You *should*
definitely implement those restrictions (as they're stated in the
hardware spec, without empirical assumptions) in the validator as part
of your VK_KHR_shader_float16_int8 series, if anything because currently
it will reject working code that uses HF types.

> ---
>  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++-------------
>  1 file changed, 14 insertions(+), 13 deletions(-)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
> index a25010b225c..3bb37677672 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -325,17 +325,20 @@ execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
>     unsigned num_sources = num_sources_from_inst(devinfo, inst);
>     enum brw_reg_type src0_exec_type, src1_exec_type;
>  
> -   /* Execution data type is independent of destination data type, except in
> -    * mixed F/HF instructions on CHV and SKL+.
> +   /* Empirical testing suggests that type conversions involving half-float
> +    * promote execution type to 32-bit. See get_exec_type() in brw_ir_fs.h.
>      */
>     enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst);
>  
>     src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
>     if (num_sources == 1) {
> -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
> -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> -         return dst_exec_type;
> +      if (type_sz(src0_exec_type) == 2 && dst_exec_type != src0_exec_type) {
> +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> +            return BRW_REGISTER_TYPE_F;
> +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> +            return BRW_REGISTER_TYPE_D;
>        }
> +
>        return src0_exec_type;
>     }
>  
> @@ -367,14 +370,12 @@ execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
>         src1_exec_type == BRW_REGISTER_TYPE_DF)
>        return BRW_REGISTER_TYPE_DF;
>  
> -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> -         return BRW_REGISTER_TYPE_F;
> -      } else {
> -         return BRW_REGISTER_TYPE_HF;
> -      }
> +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> +      return BRW_REGISTER_TYPE_F;
> +   } else {
> +      return BRW_REGISTER_TYPE_HF;
>     }
>  
>     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> -- 
> 2.17.1
On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> Iago Toral Quiroga <itoral@igalia.com> writes:
> 
> > Commit c84ec70b3a72 implemented execution type promotion to 32-bit
> > for
> > conversions involving half-float registers, which empirical testing
> > suggested
> > was required, but it did not incorporate this change into the
> > assembly validator
> > logic. This commits adds that, preventing validation errors like
> > this:
> > 
> 
> I don't think we should be validating empirical assumptions in the EU
> validator.

I am not sure I get your point, isn't c84ec70b3a72 also based on
empirical testing after all?


> > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > ERROR: Destination stride must be equal to the ratio of the sizes
> > of the
> >        execution data type to the destination type
> > 
> > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
> > when any half-float conversion is needed."
> 
> I don't think this "fixes" anything that ever worked.

It is true that the code in that trace above is not something we can
produce right now, because it is a conversion from HF to B and that
should only happen within the context of VK_KHR_shader_float16_int8,
however, this is a consequence of the fact that since c84ec70b3a72
there is an inconsistency between what we do at the IR level regarding
execution size of HF conversions and what the EU validator is doing,
and from that perspective this is really fixing an inconsistency that
didn't exist before, and I thought we would want to address that sooner
rather than later and track it down to the original change that
introduced that inconsistency so we know where this is coming from.

Anyway, that was my rationale for the Fixes tag, but if you think this
is not useful I am happy to drop this patch and just include it as part
of my series without the tag.

>   The validator is
> still missing an implementation of the quirky HF restrictions, and it
> wasn't the purpose of c84ec70b3a72 to do such a thing.

While this is true in general, the EU validator does consider the
execution type of the instruction to validate general rules such as the
one I mentioned in the commit message in this patch. And that part of
the validator is inconsistent with c84ec70b3a72. In fact, the EU
validator is accounting for execution size promotion of HF instructions
to 32-bit in SKL+ and CHV only, for conversions from HF->F and mixed
float mode instructions... which is part of what c84ec70b3a72 addresses
at the IR level, which it actually does for all hardware platforms and
in more cases.

>   You *should*
> definitely implement those restrictions (as they're stated in the
> hardware spec, without empirical assumptions) in the validator as
> part
> of your VK_KHR_shader_float16_int8 series,

Again, I am not sure what you mean by "without empirical assumptions".
According to the commit message in c84ec70b3a72 "the docs are fairly
imcomplete and inconsistent" and you explained here that you had to do
some experimentation of your own using the simulator (where you found
its results to also be inconsistent with the hardware docs) to try and
guess what is happening. That's why I was using the term "empirical"
here, since the hardware docs alone aren't clear or correct enough to
understand what is really happening, when and in what platforms.

Anyway, if you don't like the term "empirical" to refer to all this,
that's fine by me, but what I need to know is if we agree that we need
to implement the same type promotion rules in the validator that we are
implementing in the IR, indepedently of whether refer to them as "based
on empirical testing" or not.

>  if anything because currently
> it will reject working code that uses HF types.

Just for the sake of clarity, since that sentence above could be
interpreted as if all HF code would be rejected: we have been using HF
types since we landed VK_KHR_16bit_storage, which includes conversions
between HF and F and the EU validator is not complaining about any of
that. It is currently a problem only with conversions to smaller types
(so only conversions to Byte) because that's where we check for that
restriction on the stride, which is based on the execution type of the
instruction.

> 
> > ---
> >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++--------
> > -----
> >  1 file changed, 14 insertions(+), 13 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_eu_validate.c
> > b/src/intel/compiler/brw_eu_validate.c
> > index a25010b225c..3bb37677672 100644
> > --- a/src/intel/compiler/brw_eu_validate.c
> > +++ b/src/intel/compiler/brw_eu_validate.c
> > @@ -325,17 +325,20 @@ execution_type(const struct gen_device_info
> > *devinfo, const brw_inst *inst)
> >     unsigned num_sources = num_sources_from_inst(devinfo, inst);
> >     enum brw_reg_type src0_exec_type, src1_exec_type;
> >  
> > -   /* Execution data type is independent of destination data type,
> > except in
> > -    * mixed F/HF instructions on CHV and SKL+.
> > +   /* Empirical testing suggests that type conversions involving
> > half-float
> > +    * promote execution type to 32-bit. See get_exec_type() in
> > brw_ir_fs.h.
> >      */
> >     enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo,
> > inst);
> >  
> >     src0_exec_type =
> > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
> >     if (num_sources == 1) {
> > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
> > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > -         return dst_exec_type;
> > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
> > src0_exec_type) {
> > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > +            return BRW_REGISTER_TYPE_F;
> > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> > +            return BRW_REGISTER_TYPE_D;
> >        }
> > +
> >        return src0_exec_type;
> >     }
> >  
> > @@ -367,14 +370,12 @@ execution_type(const struct gen_device_info
> > *devinfo, const brw_inst *inst)
> >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> >        return BRW_REGISTER_TYPE_DF;
> >  
> > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > -         return BRW_REGISTER_TYPE_F;
> > -      } else {
> > -         return BRW_REGISTER_TYPE_HF;
> > -      }
> > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > +      return BRW_REGISTER_TYPE_F;
> > +   } else {
> > +      return BRW_REGISTER_TYPE_HF;
> >     }
> >  
> >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > -- 
> > 2.17.1
On Wed, Jan 23, 2019 at 4:18 AM Iago Toral Quiroga <itoral@igalia.com> wrote:
>
> Commit c84ec70b3a72 implemented execution type promotion to 32-bit for
> conversions involving half-float registers, which empirical testing suggested
> was required, but it did not incorporate this change into the assembly validator
> logic. This commits adds that, preventing validation errors like this:
>
> mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> ERROR: Destination stride must be equal to the ratio of the sizes of the
>        execution data type to the destination type
>
> Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit when any half-float conversion is needed."
> ---
>  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++-------------

New rule: New restrictions (or relaxations) may not be added to
brw_eu_validate.c without accompanying unit tests. I'll send a patch
to add a comment to brw_eu_validate.c saying as much.

Rationale: the reason I wrote brw_eu_validate.c was because I wasted a
week debugging an issue where fulsim not only failed to inform me that
one instruction was invalid but also incorrectly told me that one
correct instruction *was* invalid. I would have been better off
without such a tool.

If the EU validator loses people's trust, then it's useless, but if it
is incorrect it's worse than useless.
On Wed, Jan 23, 2019 at 6:03 AM Francisco Jerez <currojerez@riseup.net> wrote:
>
> Iago Toral Quiroga <itoral@igalia.com> writes:
>
> > Commit c84ec70b3a72 implemented execution type promotion to 32-bit for
> > conversions involving half-float registers, which empirical testing suggested
> > was required, but it did not incorporate this change into the assembly validator
> > logic. This commits adds that, preventing validation errors like this:
> >
>
> I don't think we should be validating empirical assumptions in the EU
> validator.

I kind of agree. I don't really know what we should do though.

I guess it's better to err on the side of caution in the EU validator
and only check restrictions that have documentation. Is that your
thinking?

Many instructions can only take certain conditional modifiers. XOR is
documented to only take .z/.nz. However we emit XOR with a .l
conditional mod for nir_op_imod. It works, and we think the
documentation is incomplete. Separately it describes how the
conditional modifiers operate, and .l only reads the high bit of the
result so it makes sense that XOR with .l should work like we see that
it does.

So, (1) empirically it works, (2) the documentation says it's not
allowed, but (3) there's a plausible explanation that the
documentation is wrong.

What should we do if we implement the conditional modifier checks in
the validator?
Iago Toral <itoral@igalia.com> writes:

> On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> Iago Toral Quiroga <itoral@igalia.com> writes:
>> 
>> > Commit c84ec70b3a72 implemented execution type promotion to 32-bit
>> > for
>> > conversions involving half-float registers, which empirical testing
>> > suggested
>> > was required, but it did not incorporate this change into the
>> > assembly validator
>> > logic. This commits adds that, preventing validation errors like
>> > this:
>> > 
>> 
>> I don't think we should be validating empirical assumptions in the EU
>> validator.
>
> I am not sure I get your point, isn't c84ec70b3a72 also based on
> empirical testing after all?
>

To some extent, but it doesn't attempt to enforce ISA restrictions based
on information obtained empirically.

>
>> > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
>> > ERROR: Destination stride must be equal to the ratio of the sizes
>> > of the
>> >        execution data type to the destination type
>> > 
>> > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
>> > when any half-float conversion is needed."
>> 
>> I don't think this "fixes" anything that ever worked.
>
> It is true that the code in that trace above is not something we can
> produce right now, because it is a conversion from HF to B and that
> should only happen within the context of VK_KHR_shader_float16_int8,
> however, this is a consequence of the fact that since c84ec70b3a72
> there is an inconsistency between what we do at the IR level regarding
> execution size of HF conversions and what the EU validator is doing,
> and from that perspective this is really fixing an inconsistency that
> didn't exist before, and I thought we would want to address that sooner
> rather than later and track it down to the original change that
> introduced that inconsistency so we know where this is coming from.
>

The "inconsistency" between the IR's get_exec_type() and the EU
validator's execution_type() has existed ever since a05b6f25bf4bfad7
removed the HF assert from get_exec_type() without actually implementing
the code required to handle HF operands (which is what my commit
c84ec70b3a72 did).

> Anyway, that was my rationale for the Fixes tag, but if you think this
> is not useful I am happy to drop this patch and just include it as part
> of my series without the tag.
>

I'd like to see the actual regioning restrictions for HF types
implemented in the EU validator as part of your series.  I don't see the
need to introduce any empirical assumptions into the EU validator's
execution_type() in order to achieve that, even if that means that
get_exec_type() and execution_type() don't do the exact same calculation
-- What you call an inconsistency is the consequence of execution_type()
being the hardware spec's opinion on what the execution type is, which
we assume is what we need to use while enforcing a regioning restriction
that refers to the execution type of the instruction.

>>   The validator is
>> still missing an implementation of the quirky HF restrictions, and it
>> wasn't the purpose of c84ec70b3a72 to do such a thing.
>
> While this is true in general, the EU validator does consider the
> execution type of the instruction to validate general rules such as the
> one I mentioned in the commit message in this patch. And that part of
> the validator is inconsistent with c84ec70b3a72.

That part of the validator was also inconsistent with the code generated
by your original VK_KHR_shader_float16_int8 series even before I
committed c84ec70b3a72.  The reason is that it is trying to validate a
restriction that rejects working code, because the "general" rule it's
trying to enforce isn't supposed to apply to instructions with HF
operands, which is the real bug.

> In fact, the EU validator is accounting for execution size promotion
> of HF instructions to 32-bit in SKL+ and CHV only, for conversions
> from HF->F and mixed float mode instructions... which is part of what
> c84ec70b3a72 addresses at the IR level, which it actually does for all
> hardware platforms and in more cases.
>

I'm fine with fixing execution_type() to do the right thing in more
cases and platforms, but I don't think that should involve making
empirical assumptions in the validator.

>>   You *should*
>> definitely implement those restrictions (as they're stated in the
>> hardware spec, without empirical assumptions) in the validator as
>> part
>> of your VK_KHR_shader_float16_int8 series,
>
> Again, I am not sure what you mean by "without empirical assumptions".

I was just paraphrasing your comment.  If you feel the need to write a
comment justifying the existence of some code in the validator based on
empirical testing, it probably doesn't belong in the validator.

> According to the commit message in c84ec70b3a72 "the docs are fairly
> imcomplete and inconsistent" and you explained here that you had to do
> some experimentation of your own using the simulator (where you found
> its results to also be inconsistent with the hardware docs) to try and
> guess what is happening. That's why I was using the term "empirical"
> here, since the hardware docs alone aren't clear or correct enough to
> understand what is really happening, when and in what platforms.
>

If some hardware restriction is described in a contradictory or
ambiguous way by the hardware spec we should probably avoid enforcing it
altogether in the EU validator.

> Anyway, if you don't like the term "empirical" to refer to all this,
> that's fine by me, but what I need to know is if we agree that we need
> to implement the same type promotion rules in the validator that we are
> implementing in the IR, indepedently of whether refer to them as "based
> on empirical testing" or not.
>

I don't think we agree on that.  IMHO you want to enforce the regioning
restrictions that the hardware spec describes for HF types rather than
the current (incorrect and incomplete) generalization of the standard
restrictions.  With those in place the apparent inconsistency between
get_exec_type() and the hardware spec's execution type would be
harmless.

>>  if anything because currently
>> it will reject working code that uses HF types.
>
> Just for the sake of clarity, since that sentence above could be
> interpreted as if all HF code would be rejected: we have been using HF
> types since we landed VK_KHR_16bit_storage, which includes conversions
> between HF and F and the EU validator is not complaining about any of
> that. It is currently a problem only with conversions to smaller types
> (so only conversions to Byte) because that's where we check for that
> restriction on the stride, which is based on the execution type of the
> instruction.
>
>> 
>> > ---
>> >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++--------
>> > -----
>> >  1 file changed, 14 insertions(+), 13 deletions(-)
>> > 
>> > diff --git a/src/intel/compiler/brw_eu_validate.c
>> > b/src/intel/compiler/brw_eu_validate.c
>> > index a25010b225c..3bb37677672 100644
>> > --- a/src/intel/compiler/brw_eu_validate.c
>> > +++ b/src/intel/compiler/brw_eu_validate.c
>> > @@ -325,17 +325,20 @@ execution_type(const struct gen_device_info
>> > *devinfo, const brw_inst *inst)
>> >     unsigned num_sources = num_sources_from_inst(devinfo, inst);
>> >     enum brw_reg_type src0_exec_type, src1_exec_type;
>> >  
>> > -   /* Execution data type is independent of destination data type,
>> > except in
>> > -    * mixed F/HF instructions on CHV and SKL+.
>> > +   /* Empirical testing suggests that type conversions involving
>> > half-float
>> > +    * promote execution type to 32-bit. See get_exec_type() in
>> > brw_ir_fs.h.
>> >      */
>> >     enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo,
>> > inst);
>> >  
>> >     src0_exec_type =
>> > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
>> >     if (num_sources == 1) {
>> > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
>> > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
>> > -         return dst_exec_type;
>> > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
>> > src0_exec_type) {
>> > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
>> > +            return BRW_REGISTER_TYPE_F;
>> > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
>> > +            return BRW_REGISTER_TYPE_D;
>> >        }
>> > +
>> >        return src0_exec_type;
>> >     }
>> >  
>> > @@ -367,14 +370,12 @@ execution_type(const struct gen_device_info
>> > *devinfo, const brw_inst *inst)
>> >         src1_exec_type == BRW_REGISTER_TYPE_DF)
>> >        return BRW_REGISTER_TYPE_DF;
>> >  
>> > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
>> > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > -         return BRW_REGISTER_TYPE_F;
>> > -      } else {
>> > -         return BRW_REGISTER_TYPE_HF;
>> > -      }
>> > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > +      return BRW_REGISTER_TYPE_F;
>> > +   } else {
>> > +      return BRW_REGISTER_TYPE_HF;
>> >     }
>> >  
>> >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
>> > -- 
>> > 2.17.1
On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> Iago Toral <itoral@igalia.com> writes:
> 
> > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > Iago Toral Quiroga <itoral@igalia.com> writes:
> > > 
> > > > Commit c84ec70b3a72 implemented execution type promotion to 32-
> > > > bit
> > > > for
> > > > conversions involving half-float registers, which empirical
> > > > testing
> > > > suggested
> > > > was required, but it did not incorporate this change into the
> > > > assembly validator
> > > > logic. This commits adds that, preventing validation errors
> > > > like
> > > > this:
> > > > 
> > > 
> > > I don't think we should be validating empirical assumptions in
> > > the EU
> > > validator.
> > 
> > I am not sure I get your point, isn't c84ec70b3a72 also based on
> > empirical testing after all?
> > 
> 
> To some extent, but it doesn't attempt to enforce ISA restrictions
> based
> on information obtained empirically.
> 
> > 
> > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > > > ERROR: Destination stride must be equal to the ratio of the
> > > > sizes
> > > > of the
> > > >        execution data type to the destination type
> > > > 
> > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
> > > > when any half-float conversion is needed."
> > > 
> > > I don't think this "fixes" anything that ever worked.
> > 
> > It is true that the code in that trace above is not something we
> > can
> > produce right now, because it is a conversion from HF to B and that
> > should only happen within the context of
> > VK_KHR_shader_float16_int8,
> > however, this is a consequence of the fact that since c84ec70b3a72
> > there is an inconsistency between what we do at the IR level
> > regarding
> > execution size of HF conversions and what the EU validator is
> > doing,
> > and from that perspective this is really fixing an inconsistency
> > that
> > didn't exist before, and I thought we would want to address that
> > sooner
> > rather than later and track it down to the original change that
> > introduced that inconsistency so we know where this is coming from.
> > 
> 
> The "inconsistency" between the IR's get_exec_type() and the EU
> validator's execution_type() has existed ever since a05b6f25bf4bfad7
> removed the HF assert from get_exec_type() without actually
> implementing
> the code required to handle HF operands (which is what my commit
> c84ec70b3a72 did).

I agree with the fact that since a05b6f25bf4bfad7 the validator could
reject valid code and that had nothing to do with your patch, but the
inconsistency I am talking about here, that this patch fixes, is the
one about get_exec_type() in the IR and execution_type() in the
validator doing different things for HF instructions, which only exists
since your patch and which you discuss below.

> > Anyway, that was my rationale for the Fixes tag, but if you think
> > this
> > is not useful I am happy to drop this patch and just include it as
> > part
> > of my series without the tag.
> > 
> 
> I'd like to see the actual regioning restrictions for HF types
> implemented in the EU validator as part of your series.

Ok, let's see if we can agree on what restrictions should we implement
then. I can implement this restriction as documented:

"Conversion between Integer and HF (Half Float) must be DWord-aligned
and strided by a DWord on the destination"

Instead of trying to apply the general one that is currently breaking.
That will fix the assertion issue. I guess my issue with it was that
going by your analysis this restriction is not telling the full
picture, this is what you had to say about this restriction:

"I have a feeling that the reason for this may be that the 16-bit
pipeline lacks the ability to handle conversions from or to half-float, 
so the execution type is implicitly promoted to the matching (integer
or floating-point) 32-bit type where any HF conversion would be
needed.  And on those the usual alignment restriction of the
destination to a larger execution type applies."

But I guess your point is that we can ignore these details at the
validator level and just focus on validating strictly what the PRM
says. Fair enough.

Unfortunatley, you also mentioned that this restriction *seems* to be
overriden by this one on all platforms but BDW (even though the both
are listed, at least I see both in my SKL docs):

"There is a relaxed alignment rule for word destinations. When the
destination type is word (UW, W, HF), destination data types can be
aligned to either the lowest word or the second lowest word of the
execution channel. This means the destination data words can be either
all in the even word locations or all in the odd word locations."

Which you discussed didn't make sense to you and I agreed, if only
because my own experiments suggested that the implications that one
would derive from a strict interpretation of this (that packed 16-bit
data is not supported) are not quite true going by the fact that we are
emitting packed instructions on all platforms without any indication
that this doesn't work. So what should we do about this one? If we
decide that we want to implement this then we have to re-think the
implementation we have.

Should we just validate the one about the integer/float conversions?
Should we do that only on BDW or on all platforms?

>   I don't see the
> need to introduce any empirical assumptions into the EU validator's
> execution_type() in order to achieve that, even if that means that
> get_exec_type() and execution_type() don't do the exact same
> calculation
> -- What you call an inconsistency is the consequence of
> execution_type()
> being the hardware spec's opinion on what the execution type is,
> which
> we assume is what we need to use while enforcing a regioning
> restriction
> that refers to the execution type of the instruction.

Mmm... I guess my question is: is the hardware really assuming that
execution type in all cases that involve HF instructions? What I got
from your analysis of the whole situation is that the hardware is
actually promoting the execution size to 32-bit in some cases, and if
that is the case, then does it make sense to implement code in the
validator that ignores that fact? I understand that you think this
doesn't have any significance once we make sure that we only apply the
restrictions specifically documented for HF from the PRM, and you are
probably right, however I have to admit that it feels a bit odd to me
that we do that when we know that under the hood there is much more
going on and we are producing FS IR code following different rules
after all. But maybe it is just me being too paranoid about this...


> > >   The validator is
> > > still missing an implementation of the quirky HF restrictions,
> > > and it
> > > wasn't the purpose of c84ec70b3a72 to do such a thing.
> > 
> > While this is true in general, the EU validator does consider the
> > execution type of the instruction to validate general rules such as
> > the
> > one I mentioned in the commit message in this patch. And that part
> > of
> > the validator is inconsistent with c84ec70b3a72.
> 
> That part of the validator was also inconsistent with the code
> generated
> by your original VK_KHR_shader_float16_int8 series even before I
> committed c84ec70b3a72.  The reason is that it is trying to validate
> a
> restriction that rejects working code, because the "general" rule
> it's
> trying to enforce isn't supposed to apply to instructions with HF
> operands, which is the real bug.

I agree with that, and I'll fix that in my series by preventing that
rule to apply to these instructions (pending to agree on the rules that
we should validate instead), but as I have already explained above,
that was not the inconsistency I was referring to.

> > In fact, the EU validator is accounting for execution size
> > promotion
> > of HF instructions to 32-bit in SKL+ and CHV only, for conversions
> > from HF->F and mixed float mode instructions... which is part of
> > what
> > c84ec70b3a72 addresses at the IR level, which it actually does for
> > all
> > hardware platforms and in more cases.
> > 
> 
> I'm fine with fixing execution_type() to do the right thing in more
> cases and platforms, but I don't think that should involve making
> empirical assumptions in the validator.

Just to be clear, in the particular case we are discussing here, I
understand this means that we should leave the validator's
execution_type() as-is.

> > >   You *should*
> > > definitely implement those restrictions (as they're stated in the
> > > hardware spec, without empirical assumptions) in the validator as
> > > part
> > > of your VK_KHR_shader_float16_int8 series,
> > 
> > Again, I am not sure what you mean by "without empirical
> > assumptions".
> 
> I was just paraphrasing your comment.  If you feel the need to write
> a
> comment justifying the existence of some code in the validator based
> on
> empirical testing, it probably doesn't belong in the validator.
> 
> > According to the commit message in c84ec70b3a72 "the docs are
> > fairly
> > imcomplete and inconsistent" and you explained here that you had to
> > do
> > some experimentation of your own using the simulator (where you
> > found
> > its results to also be inconsistent with the hardware docs) to try
> > and
> > guess what is happening. That's why I was using the term
> > "empirical"
> > here, since the hardware docs alone aren't clear or correct enough
> > to
> > understand what is really happening, when and in what platforms.
> > 
> 
> If some hardware restriction is described in a contradictory or
> ambiguous way by the hardware spec we should probably avoid enforcing
> it
> altogether in the EU validator.

Okay, in that case I guess you would rule the restriction about the
relaxed alignment rule for Word destinations as such, but not the one
about half-float/integer conversions. Is that correct?

> > Anyway, if you don't like the term "empirical" to refer to all
> > this,
> > that's fine by me, but what I need to know is if we agree that we
> > need
> > to implement the same type promotion rules in the validator that we
> > are
> > implementing in the IR, indepedently of whether refer to them as
> > "based
> > on empirical testing" or not.
> > 
> 
> I don't think we agree on that.  IMHO you want to enforce the
> regioning
> restrictions that the hardware spec describes for HF types rather
> than
> the current (incorrect and incomplete) generalization of the standard
> restrictions.  With those in place the apparent inconsistency between
> get_exec_type() and the hardware spec's execution type would be
> harmless.

I agree with that. I still feel uneasy about having two different 
implementations of the execution type in FS IR and the validator, even
if that doesn't lead to any issues when we do what you say here...

At the very least I think that deserves a comment in the validator
explaining why we think that is okay.

> > >  if anything because currently
> > > it will reject working code that uses HF types.
> > 
> > Just for the sake of clarity, since that sentence above could be
> > interpreted as if all HF code would be rejected: we have been using
> > HF
> > types since we landed VK_KHR_16bit_storage, which includes
> > conversions
> > between HF and F and the EU validator is not complaining about any
> > of
> > that. It is currently a problem only with conversions to smaller
> > types
> > (so only conversions to Byte) because that's where we check for
> > that
> > restriction on the stride, which is based on the execution type of
> > the
> > instruction.
> > 
> > > 
> > > > ---
> > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++----
> > > > ----
> > > > -----
> > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > 
> > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > b/src/intel/compiler/brw_eu_validate.c
> > > > index a25010b225c..3bb37677672 100644
> > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > gen_device_info
> > > > *devinfo, const brw_inst *inst)
> > > >     unsigned num_sources = num_sources_from_inst(devinfo,
> > > > inst);
> > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > >  
> > > > -   /* Execution data type is independent of destination data
> > > > type,
> > > > except in
> > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > +   /* Empirical testing suggests that type conversions
> > > > involving
> > > > half-float
> > > > +    * promote execution type to 32-bit. See get_exec_type() in
> > > > brw_ir_fs.h.
> > > >      */
> > > >     enum brw_reg_type dst_exec_type =
> > > > brw_inst_dst_type(devinfo,
> > > > inst);
> > > >  
> > > >     src0_exec_type =
> > > > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
> > > >     if (num_sources == 1) {
> > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
> > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > -         return dst_exec_type;
> > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
> > > > src0_exec_type) {
> > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > +            return BRW_REGISTER_TYPE_F;
> > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> > > > +            return BRW_REGISTER_TYPE_D;
> > > >        }
> > > > +
> > > >        return src0_exec_type;
> > > >     }
> > > >  
> > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > gen_device_info
> > > > *devinfo, const brw_inst *inst)
> > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > >        return BRW_REGISTER_TYPE_DF;
> > > >  
> > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > -         return BRW_REGISTER_TYPE_F;
> > > > -      } else {
> > > > -         return BRW_REGISTER_TYPE_HF;
> > > > -      }
> > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > +      return BRW_REGISTER_TYPE_F;
> > > > +   } else {
> > > > +      return BRW_REGISTER_TYPE_HF;
> > > >     }
> > > >  
> > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > -- 
> > > > 2.17.1
On Thu, 2019-01-24 at 10:22 -0800, Matt Turner wrote:
> On Wed, Jan 23, 2019 at 4:18 AM Iago Toral Quiroga <itoral@igalia.com
> > wrote:
> > 
> > Commit c84ec70b3a72 implemented execution type promotion to 32-bit
> > for
> > conversions involving half-float registers, which empirical testing
> > suggested
> > was required, but it did not incorporate this change into the
> > assembly validator
> > logic. This commits adds that, preventing validation errors like
> > this:
> > 
> > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > ERROR: Destination stride must be equal to the ratio of the sizes
> > of the
> >        execution data type to the destination type
> > 
> > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
> > when any half-float conversion is needed."
> > ---
> >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++--------
> > -----
> 
> New rule: New restrictions (or relaxations) may not be added to
> brw_eu_validate.c without accompanying unit tests. I'll send a patch
> to add a comment to brw_eu_validate.c saying as much.
> 
> Rationale: the reason I wrote brw_eu_validate.c was because I wasted
> a
> week debugging an issue where fulsim not only failed to inform me
> that
> one instruction was invalid but also incorrectly told me that one
> correct instruction *was* invalid. I would have been better off
> without such a tool.
> 
> If the EU validator loses people's trust, then it's useless, but if
> it
> is incorrect it's worse than useless.


Makes sense to me.

Iago
Iago Toral <itoral@igalia.com> writes:

> On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
>> Iago Toral <itoral@igalia.com> writes:
>> 
>> > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> > > Iago Toral Quiroga <itoral@igalia.com> writes:
>> > > 
>> > > > Commit c84ec70b3a72 implemented execution type promotion to 32-
>> > > > bit
>> > > > for
>> > > > conversions involving half-float registers, which empirical
>> > > > testing
>> > > > suggested
>> > > > was required, but it did not incorporate this change into the
>> > > > assembly validator
>> > > > logic. This commits adds that, preventing validation errors
>> > > > like
>> > > > this:
>> > > > 
>> > > 
>> > > I don't think we should be validating empirical assumptions in
>> > > the EU
>> > > validator.
>> > 
>> > I am not sure I get your point, isn't c84ec70b3a72 also based on
>> > empirical testing after all?
>> > 
>> 
>> To some extent, but it doesn't attempt to enforce ISA restrictions
>> based
>> on information obtained empirically.
>> 
>> > 
>> > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
>> > > > ERROR: Destination stride must be equal to the ratio of the
>> > > > sizes
>> > > > of the
>> > > >        execution data type to the destination type
>> > > > 
>> > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
>> > > > when any half-float conversion is needed."
>> > > 
>> > > I don't think this "fixes" anything that ever worked.
>> > 
>> > It is true that the code in that trace above is not something we
>> > can
>> > produce right now, because it is a conversion from HF to B and that
>> > should only happen within the context of
>> > VK_KHR_shader_float16_int8,
>> > however, this is a consequence of the fact that since c84ec70b3a72
>> > there is an inconsistency between what we do at the IR level
>> > regarding
>> > execution size of HF conversions and what the EU validator is
>> > doing,
>> > and from that perspective this is really fixing an inconsistency
>> > that
>> > didn't exist before, and I thought we would want to address that
>> > sooner
>> > rather than later and track it down to the original change that
>> > introduced that inconsistency so we know where this is coming from.
>> > 
>> 
>> The "inconsistency" between the IR's get_exec_type() and the EU
>> validator's execution_type() has existed ever since a05b6f25bf4bfad7
>> removed the HF assert from get_exec_type() without actually
>> implementing
>> the code required to handle HF operands (which is what my commit
>> c84ec70b3a72 did).
>
> I agree with the fact that since a05b6f25bf4bfad7 the validator could
> reject valid code and that had nothing to do with your patch,

The validator rejected the same valid HF code since it was written, that
had nothing to do with neither a05b6f25bf4bfad7 nor with my patch, and
it is the real problem this patch was working around.

> but the inconsistency I am talking about here, that this patch fixes,
> is the one about get_exec_type() in the IR and execution_type() in the
> validator doing different things for HF instructions, which only
> exists since your patch and which you discuss below.
>

The "inconsistency" exists ever since get_exec_type() was introduced
without correct handling of HF types (even though execution_type()
already attempted to handle it).  And I disagree that it's a real
inconsistency except due to the fact that the validator is incorrectly
attempting to validate the alignment of the destination region according
to a rule that doesn't apply to HF types.

>> > Anyway, that was my rationale for the Fixes tag, but if you think
>> > this
>> > is not useful I am happy to drop this patch and just include it as
>> > part
>> > of my series without the tag.
>> > 
>> 
>> I'd like to see the actual regioning restrictions for HF types
>> implemented in the EU validator as part of your series.
>
> Ok, let's see if we can agree on what restrictions should we implement
> then. I can implement this restriction as documented:
>
> "Conversion between Integer and HF (Half Float) must be DWord-aligned
> and strided by a DWord on the destination"
>
> Instead of trying to apply the general one that is currently breaking.
> That will fix the assertion issue. I guess my issue with it was that
> going by your analysis this restriction is not telling the full
> picture, this is what you had to say about this restriction:
>
> "I have a feeling that the reason for this may be that the 16-bit
> pipeline lacks the ability to handle conversions from or to half-float, 
> so the execution type is implicitly promoted to the matching (integer
> or floating-point) 32-bit type where any HF conversion would be
> needed.  And on those the usual alignment restriction of the
> destination to a larger execution type applies."
>
> But I guess your point is that we can ignore these details at the
> validator level and just focus on validating strictly what the PRM
> says. Fair enough.
>
> Unfortunatley, you also mentioned that this restriction *seems* to be
> overriden by this one on all platforms but BDW (even though the both
> are listed, at least I see both in my SKL docs):
>
> "There is a relaxed alignment rule for word destinations. When the
> destination type is word (UW, W, HF), destination data types can be
> aligned to either the lowest word or the second lowest word of the
> execution channel. This means the destination data words can be either
> all in the even word locations or all in the odd word locations."
>

I'd implement this one since it seems like the most specific, except on
BDW where only the previous (almost equivalent) restriction seems to
apply.  There are shitloads of other restrictions we're missing for HF
types BTW, check out the section "Special Requirements for Handling
Mixed Mode Float Operations" of the hardware spec.

> Which you discussed didn't make sense to you and I agreed, if only
> because my own experiments suggested that the implications that one
> would derive from a strict interpretation of this (that packed 16-bit
> data is not supported) are not quite true going by the fact that we are
> emitting packed instructions on all platforms without any indication
> that this doesn't work. So what should we do about this one? If we
> decide that we want to implement this then we have to re-think the
> implementation we have.
>

The implication on packed HF instructions is likely accidental, I don't
think we should enforce the rule except for conversion operations.

> Should we just validate the one about the integer/float conversions?
> Should we do that only on BDW or on all platforms?
>
>>   I don't see the
>> need to introduce any empirical assumptions into the EU validator's
>> execution_type() in order to achieve that, even if that means that
>> get_exec_type() and execution_type() don't do the exact same
>> calculation
>> -- What you call an inconsistency is the consequence of
>> execution_type()
>> being the hardware spec's opinion on what the execution type is,
>> which
>> we assume is what we need to use while enforcing a regioning
>> restriction
>> that refers to the execution type of the instruction.
>
> Mmm... I guess my question is: is the hardware really assuming that
> execution type in all cases that involve HF instructions? What I got
> from your analysis of the whole situation is that the hardware is
> actually promoting the execution size to 32-bit in some cases, and if
> that is the case, then does it make sense to implement code in the
> validator that ignores that fact?

If the hardware spec chose to ignore that fact and instead gave us a
different set of regioning restrictions for us to use on HF restrictions
that don't rely on the definition of execution type, we should probably
have the validator match the hardware spec literally.

> I understand that you think this doesn't have any significance once we
> make sure that we only apply the restrictions specifically documented
> for HF from the PRM, and you are probably right, however I have to
> admit that it feels a bit odd to me that we do that when we know that
> under the hood there is much more going on and we are producing FS IR
> code following different rules after all. But maybe it is just me
> being too paranoid about this...
>

Wouldn't it be even odder to have the hardware restriction validator
diverge from the documentation it's supposedly validating?

>
>> > >   The validator is
>> > > still missing an implementation of the quirky HF restrictions,
>> > > and it
>> > > wasn't the purpose of c84ec70b3a72 to do such a thing.
>> > 
>> > While this is true in general, the EU validator does consider the
>> > execution type of the instruction to validate general rules such as
>> > the
>> > one I mentioned in the commit message in this patch. And that part
>> > of
>> > the validator is inconsistent with c84ec70b3a72.
>> 
>> That part of the validator was also inconsistent with the code
>> generated
>> by your original VK_KHR_shader_float16_int8 series even before I
>> committed c84ec70b3a72.  The reason is that it is trying to validate
>> a
>> restriction that rejects working code, because the "general" rule
>> it's
>> trying to enforce isn't supposed to apply to instructions with HF
>> operands, which is the real bug.
>
> I agree with that, and I'll fix that in my series by preventing that
> rule to apply to these instructions (pending to agree on the rules that
> we should validate instead), but as I have already explained above,
> that was not the inconsistency I was referring to.
>

It's the only actual inconsistency this was fixing.

>> > In fact, the EU validator is accounting for execution size
>> > promotion
>> > of HF instructions to 32-bit in SKL+ and CHV only, for conversions
>> > from HF->F and mixed float mode instructions... which is part of
>> > what
>> > c84ec70b3a72 addresses at the IR level, which it actually does for
>> > all
>> > hardware platforms and in more cases.
>> > 
>> 
>> I'm fine with fixing execution_type() to do the right thing in more
>> cases and platforms, but I don't think that should involve making
>> empirical assumptions in the validator.
>
> Just to be clear, in the particular case we are discussing here, I
> understand this means that we should leave the validator's
> execution_type() as-is.
>

That's not what I was saying.  Last time I looked at execution_type() it
did seem somewhat sketchy regarding HF types.  I wouldn't be surprised
if it needs some fixes to match the hardware spec (*not* our empirical
knowledge of the hardware) in addition to the above.

>> > >   You *should*
>> > > definitely implement those restrictions (as they're stated in the
>> > > hardware spec, without empirical assumptions) in the validator as
>> > > part
>> > > of your VK_KHR_shader_float16_int8 series,
>> > 
>> > Again, I am not sure what you mean by "without empirical
>> > assumptions".
>> 
>> I was just paraphrasing your comment.  If you feel the need to write
>> a
>> comment justifying the existence of some code in the validator based
>> on
>> empirical testing, it probably doesn't belong in the validator.
>> 
>> > According to the commit message in c84ec70b3a72 "the docs are
>> > fairly
>> > imcomplete and inconsistent" and you explained here that you had to
>> > do
>> > some experimentation of your own using the simulator (where you
>> > found
>> > its results to also be inconsistent with the hardware docs) to try
>> > and
>> > guess what is happening. That's why I was using the term
>> > "empirical"
>> > here, since the hardware docs alone aren't clear or correct enough
>> > to
>> > understand what is really happening, when and in what platforms.
>> > 
>> 
>> If some hardware restriction is described in a contradictory or
>> ambiguous way by the hardware spec we should probably avoid enforcing
>> it
>> altogether in the EU validator.
>
> Okay, in that case I guess you would rule the restriction about the
> relaxed alignment rule for Word destinations as such, but not the one
> about half-float/integer conversions. Is that correct?
>

We probably need to enforce both since they apply to different sets of
hardware.

>> > Anyway, if you don't like the term "empirical" to refer to all
>> > this,
>> > that's fine by me, but what I need to know is if we agree that we
>> > need
>> > to implement the same type promotion rules in the validator that we
>> > are
>> > implementing in the IR, indepedently of whether refer to them as
>> > "based
>> > on empirical testing" or not.
>> > 
>> 
>> I don't think we agree on that.  IMHO you want to enforce the
>> regioning
>> restrictions that the hardware spec describes for HF types rather
>> than
>> the current (incorrect and incomplete) generalization of the standard
>> restrictions.  With those in place the apparent inconsistency between
>> get_exec_type() and the hardware spec's execution type would be
>> harmless.
>
> I agree with that. I still feel uneasy about having two different 
> implementations of the execution type in FS IR and the validator, even
> if that doesn't lead to any issues when we do what you say here...
>

I feel uneasy about the validator not matching the hardware spec.
Luckily its only purpose is validating the rest of the compiler, so if
there is an actual inconsistency as you seem to think, it will come up.

> At the very least I think that deserves a comment in the validator
> explaining why we think that is okay.
>

A quote of the hardware spec should be enough of a comment.  Right?  And
there is already a comment in get_exec_type() explaining why its HF type
handling logic is consistent with the hardware spec restrictions for HF
types.

>> > >  if anything because currently
>> > > it will reject working code that uses HF types.
>> > 
>> > Just for the sake of clarity, since that sentence above could be
>> > interpreted as if all HF code would be rejected: we have been using
>> > HF
>> > types since we landed VK_KHR_16bit_storage, which includes
>> > conversions
>> > between HF and F and the EU validator is not complaining about any
>> > of
>> > that. It is currently a problem only with conversions to smaller
>> > types
>> > (so only conversions to Byte) because that's where we check for
>> > that
>> > restriction on the stride, which is based on the execution type of
>> > the
>> > instruction.
>> > 
>> > > 
>> > > > ---
>> > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++----
>> > > > ----
>> > > > -----
>> > > >  1 file changed, 14 insertions(+), 13 deletions(-)
>> > > > 
>> > > > diff --git a/src/intel/compiler/brw_eu_validate.c
>> > > > b/src/intel/compiler/brw_eu_validate.c
>> > > > index a25010b225c..3bb37677672 100644
>> > > > --- a/src/intel/compiler/brw_eu_validate.c
>> > > > +++ b/src/intel/compiler/brw_eu_validate.c
>> > > > @@ -325,17 +325,20 @@ execution_type(const struct
>> > > > gen_device_info
>> > > > *devinfo, const brw_inst *inst)
>> > > >     unsigned num_sources = num_sources_from_inst(devinfo,
>> > > > inst);
>> > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
>> > > >  
>> > > > -   /* Execution data type is independent of destination data
>> > > > type,
>> > > > except in
>> > > > -    * mixed F/HF instructions on CHV and SKL+.
>> > > > +   /* Empirical testing suggests that type conversions
>> > > > involving
>> > > > half-float
>> > > > +    * promote execution type to 32-bit. See get_exec_type() in
>> > > > brw_ir_fs.h.
>> > > >      */
>> > > >     enum brw_reg_type dst_exec_type =
>> > > > brw_inst_dst_type(devinfo,
>> > > > inst);
>> > > >  
>> > > >     src0_exec_type =
>> > > > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
>> > > >     if (num_sources == 1) {
>> > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
>> > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
>> > > > -         return dst_exec_type;
>> > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
>> > > > src0_exec_type) {
>> > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
>> > > > +            return BRW_REGISTER_TYPE_F;
>> > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
>> > > > +            return BRW_REGISTER_TYPE_D;
>> > > >        }
>> > > > +
>> > > >        return src0_exec_type;
>> > > >     }
>> > > >  
>> > > > @@ -367,14 +370,12 @@ execution_type(const struct
>> > > > gen_device_info
>> > > > *devinfo, const brw_inst *inst)
>> > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
>> > > >        return BRW_REGISTER_TYPE_DF;
>> > > >  
>> > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
>> > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > -         return BRW_REGISTER_TYPE_F;
>> > > > -      } else {
>> > > > -         return BRW_REGISTER_TYPE_HF;
>> > > > -      }
>> > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > +      return BRW_REGISTER_TYPE_F;
>> > > > +   } else {
>> > > > +      return BRW_REGISTER_TYPE_HF;
>> > > >     }
>> > > >  
>> > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
>> > > > -- 
>> > > > 2.17.1
On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> Iago Toral <itoral@igalia.com> writes:
> 
> > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > Iago Toral <itoral@igalia.com> writes:
> > > 
> > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > Iago Toral Quiroga <itoral@igalia.com> writes:

(...)

> > > I'd like to see the actual regioning restrictions for HF types
> > > implemented in the EU validator as part of your series.
> > 
> > Ok, let's see if we can agree on what restrictions should we
> > implement
> > then. I can implement this restriction as documented:
> > 
> > "Conversion between Integer and HF (Half Float) must be DWord-
> > aligned
> > and strided by a DWord on the destination"
> > 
> > Instead of trying to apply the general one that is currently
> > breaking.
> > That will fix the assertion issue. I guess my issue with it was
> > that
> > going by your analysis this restriction is not telling the full
> > picture, this is what you had to say about this restriction:
> > 
> > "I have a feeling that the reason for this may be that the 16-bit
> > pipeline lacks the ability to handle conversions from or to half-
> > float, 
> > so the execution type is implicitly promoted to the matching
> > (integer
> > or floating-point) 32-bit type where any HF conversion would be
> > needed.  And on those the usual alignment restriction of the
> > destination to a larger execution type applies."
> > 
> > But I guess your point is that we can ignore these details at the
> > validator level and just focus on validating strictly what the PRM
> > says. Fair enough.
> > 
> > Unfortunatley, you also mentioned that this restriction *seems* to
> > be
> > overriden by this one on all platforms but BDW (even though the
> > both
> > are listed, at least I see both in my SKL docs):
> > 
> > "There is a relaxed alignment rule for word destinations. When the
> > destination type is word (UW, W, HF), destination data types can be
> > aligned to either the lowest word or the second lowest word of the
> > execution channel. This means the destination data words can be
> > either
> > all in the even word locations or all in the odd word locations."
> > 
> 
> I'd implement this one since it seems like the most specific, except
> on
> BDW where only the previous (almost equivalent) restriction seems to
> apply.

So we apply the one about relaxed alignment on all platforms but BDW,
and on BDW we apply the other one. Ok.

>   There are shitloads of other restrictions we're missing for HF
> types BTW, check out the section "Special Requirements for Handling
> Mixed Mode Float Operations" of the hardware spec.

Yes, I'll go through those in my series too, but I want to have a clear
understanding about the others since they are the ones that triggered
this discussion.

> > Which you discussed didn't make sense to you and I agreed, if only
> > because my own experiments suggested that the implications that one
> > would derive from a strict interpretation of this (that packed 16-
> > bit
> > data is not supported) are not quite true going by the fact that we
> > are
> > emitting packed instructions on all platforms without any
> > indication
> > that this doesn't work. So what should we do about this one? If we
> > decide that we want to implement this then we have to re-think the
> > implementation we have.
> > 
> 
> The implication on packed HF instructions is likely accidental, I
> don't
> think we should enforce the rule except for conversion operations.

Ok, I'll implement this only for conversions then.

> > Should we just validate the one about the integer/float
> > conversions?
> > Should we do that only on BDW or on all platforms?
> > 
> > >   I don't see the
> > > need to introduce any empirical assumptions into the EU
> > > validator's
> > > execution_type() in order to achieve that, even if that means
> > > that
> > > get_exec_type() and execution_type() don't do the exact same
> > > calculation
> > > -- What you call an inconsistency is the consequence of
> > > execution_type()
> > > being the hardware spec's opinion on what the execution type is,
> > > which
> > > we assume is what we need to use while enforcing a regioning
> > > restriction
> > > that refers to the execution type of the instruction.
> > 
> > Mmm... I guess my question is: is the hardware really assuming that
> > execution type in all cases that involve HF instructions? What I
> > got
> > from your analysis of the whole situation is that the hardware is
> > actually promoting the execution size to 32-bit in some cases, and
> > if
> > that is the case, then does it make sense to implement code in the
> > validator that ignores that fact?
> 
> If the hardware spec chose to ignore that fact and instead gave us a
> different set of regioning restrictions for us to use on HF
> restrictions
> that don't rely on the definition of execution type, we should
> probably
> have the validator match the hardware spec literally.
> 

Ok, my initial understanding was that the hardware docs were not
exactly reliable when it came to the way in which they documented these
restrictions, that's why I was expecting that we would not be exactly
following the PRM to validate assembly code and that we would instead
rely on your findings about the execution type and validate code using
that instead. Anyway, I think I understand your point now.

> > I understand that you think this doesn't have any significance once
> > we
> > make sure that we only apply the restrictions specifically
> > documented
> > for HF from the PRM, and you are probably right, however I have to
> > admit that it feels a bit odd to me that we do that when we know
> > that
> > under the hood there is much more going on and we are producing FS
> > IR
> > code following different rules after all. But maybe it is just me
> > being too paranoid about this...
> > 
> 
> Wouldn't it be even odder to have the hardware restriction validator
> diverge from the documentation it's supposedly validating?

If that's the consequence of empirical testing suggesting that the
hardware doesn't exactly do what the documentation states, then I don't
think that would be that odd, particularly if that is clearly explained
with a comment in the code that adds that validation. But I understand
that we want to keep te validator strictly validating the restrictions
as closely as possible to the way in which they are formulated in the
PRM, even if we know that they are not telling the complete picture, I
get that now.

(...)

> >  IMHO you want to enforce the
> > > regioning
> > > restrictions that the hardware spec describes for HF types rather
> > > than
> > > the current (incorrect and incomplete) generalization of the
> > > standard
> > > restrictions.  With those in place the apparent inconsistency
> > > between
> > > get_exec_type() and the hardware spec's execution type would be
> > > harmless.
> > 
> > I agree with that. I still feel uneasy about having two different 
> > implementations of the execution type in FS IR and the validator,
> > even
> > if that doesn't lead to any issues when we do what you say here...
> > 
> 
> I feel uneasy about the validator not matching the hardware spec.
> Luckily its only purpose is validating the rest of the compiler, so
> if
> there is an actual inconsistency as you seem to think, it will come
> up.
> 
> > At the very least I think that deserves a comment in the validator
> > explaining why we think that is okay.
> > 
> 
> A quote of the hardware spec should be enough of a
> comment.  Right?  And
> there is already a comment in get_exec_type() explaining why its HF
> type
> handling logic is consistent with the hardware spec restrictions for
> HF
> types.

It explains why get_exec_type() is consistent with the spec
restrictions, but I don't think it explains why, for the same
instruction, get_exec_type() and execution_type() can return different
results (and why that is fine). Anyway, if you think a comment
explaining this is not necessary I won't add it.

> > > > >  if anything because currently
> > > > > it will reject working code that uses HF types.
> > > > 
> > > > Just for the sake of clarity, since that sentence above could
> > > > be
> > > > interpreted as if all HF code would be rejected: we have been
> > > > using
> > > > HF
> > > > types since we landed VK_KHR_16bit_storage, which includes
> > > > conversions
> > > > between HF and F and the EU validator is not complaining about
> > > > any
> > > > of
> > > > that. It is currently a problem only with conversions to
> > > > smaller
> > > > types
> > > > (so only conversions to Byte) because that's where we check for
> > > > that
> > > > restriction on the stride, which is based on the execution type
> > > > of
> > > > the
> > > > instruction.
> > > > 
> > > > > 
> > > > > > ---
> > > > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++
> > > > > > ----
> > > > > > ----
> > > > > > -----
> > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > > > 
> > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > index a25010b225c..3bb37677672 100644
> > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > > > gen_device_info
> > > > > > *devinfo, const brw_inst *inst)
> > > > > >     unsigned num_sources = num_sources_from_inst(devinfo,
> > > > > > inst);
> > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > > > >  
> > > > > > -   /* Execution data type is independent of destination
> > > > > > data
> > > > > > type,
> > > > > > except in
> > > > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > > > +   /* Empirical testing suggests that type conversions
> > > > > > involving
> > > > > > half-float
> > > > > > +    * promote execution type to 32-bit. See
> > > > > > get_exec_type() in
> > > > > > brw_ir_fs.h.
> > > > > >      */
> > > > > >     enum brw_reg_type dst_exec_type =
> > > > > > brw_inst_dst_type(devinfo,
> > > > > > inst);
> > > > > >  
> > > > > >     src0_exec_type =
> > > > > > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
> > > > > >     if (num_sources == 1) {
> > > > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > > > -         return dst_exec_type;
> > > > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
> > > > > > src0_exec_type) {
> > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > +            return BRW_REGISTER_TYPE_F;
> > > > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > +            return BRW_REGISTER_TYPE_D;
> > > > > >        }
> > > > > > +
> > > > > >        return src0_exec_type;
> > > > > >     }
> > > > > >  
> > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > > > gen_device_info
> > > > > > *devinfo, const brw_inst *inst)
> > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > > > >        return BRW_REGISTER_TYPE_DF;
> > > > > >  
> > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > -         return BRW_REGISTER_TYPE_F;
> > > > > > -      } else {
> > > > > > -         return BRW_REGISTER_TYPE_HF;
> > > > > > -      }
> > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > +      return BRW_REGISTER_TYPE_F;
> > > > > > +   } else {
> > > > > > +      return BRW_REGISTER_TYPE_HF;
> > > > > >     }
> > > > > >  
> > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > > > -- 
> > > > > > 2.17.1
On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> Iago Toral <itoral@igalia.com> writes:
> 
> > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > Iago Toral <itoral@igalia.com> writes:
> > > 
> > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
> > > > > 
> > > > > > Commit c84ec70b3a72 implemented execution type promotion to
> > > > > > 32-
> > > > > > bit
> > > > > > for
> > > > > > conversions involving half-float registers, which empirical
> > > > > > testing
> > > > > > suggested
> > > > > > was required, but it did not incorporate this change into
> > > > > > the
> > > > > > assembly validator
> > > > > > logic. This commits adds that, preventing validation errors
> > > > > > like
> > > > > > this:
> > > > > > 
> > > > > 
> > > > > I don't think we should be validating empirical assumptions
> > > > > in
> > > > > the EU
> > > > > validator.
> > > > 
> > > > I am not sure I get your point, isn't c84ec70b3a72 also based
> > > > on
> > > > empirical testing after all?
> > > > 
> > > 
> > > To some extent, but it doesn't attempt to enforce ISA
> > > restrictions
> > > based
> > > on information obtained empirically.
> > > 
> > > > 
> > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > > > > > ERROR: Destination stride must be equal to the ratio of the
> > > > > > sizes
> > > > > > of the
> > > > > >        execution data type to the destination type
> > > > > > 
> > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
> > > > > > 32-bit
> > > > > > when any half-float conversion is needed."
> > > > > 
> > > > > I don't think this "fixes" anything that ever worked.
> > > > 
> > > > It is true that the code in that trace above is not something
> > > > we
> > > > can
> > > > produce right now, because it is a conversion from HF to B and
> > > > that
> > > > should only happen within the context of
> > > > VK_KHR_shader_float16_int8,
> > > > however, this is a consequence of the fact that since
> > > > c84ec70b3a72
> > > > there is an inconsistency between what we do at the IR level
> > > > regarding
> > > > execution size of HF conversions and what the EU validator is
> > > > doing,
> > > > and from that perspective this is really fixing an
> > > > inconsistency
> > > > that
> > > > didn't exist before, and I thought we would want to address
> > > > that
> > > > sooner
> > > > rather than later and track it down to the original change that
> > > > introduced that inconsistency so we know where this is coming
> > > > from.
> > > > 
> > > 
> > > The "inconsistency" between the IR's get_exec_type() and the EU
> > > validator's execution_type() has existed ever since
> > > a05b6f25bf4bfad7
> > > removed the HF assert from get_exec_type() without actually
> > > implementing
> > > the code required to handle HF operands (which is what my commit
> > > c84ec70b3a72 did).
> > 
> > I agree with the fact that since a05b6f25bf4bfad7 the validator
> > could
> > reject valid code and that had nothing to do with your patch,
> 
> The validator rejected the same valid HF code since it was written,
> that
> had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
> and
> it is the real problem this patch was working around.
> 
> > but the inconsistency I am talking about here, that this patch
> > fixes,
> > is the one about get_exec_type() in the IR and execution_type() in
> > the
> > validator doing different things for HF instructions, which only
> > exists since your patch and which you discuss below.
> > 
> 
> The "inconsistency" exists ever since get_exec_type() was introduced
> without correct handling of HF types (even though execution_type()
> already attempted to handle it).  And I disagree that it's a real
> inconsistency except due to the fact that the validator is
> incorrectly
> attempting to validate the alignment of the destination region
> according
> to a rule that doesn't apply to HF types.
> 
> > > > Anyway, that was my rationale for the Fixes tag, but if you
> > > > think
> > > > this
> > > > is not useful I am happy to drop this patch and just include it
> > > > as
> > > > part
> > > > of my series without the tag.
> > > > 
> > > 
> > > I'd like to see the actual regioning restrictions for HF types
> > > implemented in the EU validator as part of your series.
> > 
> > Ok, let's see if we can agree on what restrictions should we
> > implement
> > then. I can implement this restriction as documented:
> > 
> > "Conversion between Integer and HF (Half Float) must be DWord-
> > aligned
> > and strided by a DWord on the destination"
> > 
> > Instead of trying to apply the general one that is currently
> > breaking.
> > That will fix the assertion issue. I guess my issue with it was
> > that
> > going by your analysis this restriction is not telling the full
> > picture, this is what you had to say about this restriction:
> > 
> > "I have a feeling that the reason for this may be that the 16-bit
> > pipeline lacks the ability to handle conversions from or to half-
> > float, 
> > so the execution type is implicitly promoted to the matching
> > (integer
> > or floating-point) 32-bit type where any HF conversion would be
> > needed.  And on those the usual alignment restriction of the
> > destination to a larger execution type applies."
> > 
> > But I guess your point is that we can ignore these details at the
> > validator level and just focus on validating strictly what the PRM
> > says. Fair enough.
> > 
> > Unfortunatley, you also mentioned that this restriction *seems* to
> > be
> > overriden by this one on all platforms but BDW (even though the
> > both
> > are listed, at least I see both in my SKL docs):
> > 
> > "There is a relaxed alignment rule for word destinations. When the
> > destination type is word (UW, W, HF), destination data types can be
> > aligned to either the lowest word or the second lowest word of the
> > execution channel. This means the destination data words can be
> > either
> > all in the even word locations or all in the odd word locations."
> > 
> 
> I'd implement this one since it seems like the most specific, except
> on
> BDW where only the previous (almost equivalent) restriction seems to
> apply. 

I have just realized that this restriction could be at odds with this
other aspect of the spec:

"There is no direct conversion from B/UB to DF or DF to B/UB. Use two
instructions and a word or DWord intermediate type."

So it is okay to convert from B to W and then from W to DF/Q (it has
the same text from conversions between HF and 64-bit types). We
actually emit these conversions and they work fine going by the
existing CTS tests, but these conversions are not aligned to 32-bit
like the restriction states, instead they are aligned to 64-bit (the
regioning lowering pass does this):

mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };

So I think this is a bit inconsistent again and I guess we need to
choose what we want to do. I can think of 4 options:

1. We could assume that the restriction is correctly formulated and
split the instructions to respect the restriction as-is even if we have
not found any evidence that this doesn't work

2. We could assume that the restriction is correctly formulated and the
regioning lowering pass need to be fixed to generate a conversion with
a stride of 2 even if the execution size if 64-bit (haven't tried this
since it sounds sketchy to me).

3. We could understand that the restriction is strictly formulated for
conversions from a 32-bit execution types, and decide that it does not
apply to conversions from 64-bit sources.

4. We can assume that the restriction is formulated incorrectly and
instead of saying that destination words should all be in even or odd
slots, assuming a conversion from a 32-bit type, it should just say
that they should be aligned to the execution type to also accomodate
conversions from 64-bit types.

And indepedently of how we decide to interpret the restriction in the
end, given that we have discussed here that we should only validate
things as they are described in the PRM, I think we also need to decide
what we implement in the validator (if anything) if we decide to go
with either 3) or 4) (were we would not be exactly following the PRM to
the letter).

I kind of think that 4) might be what is going on here...
what do you think?

>  There are shitloads of other restrictions we're missing for HF
> types BTW, check out the section "Special Requirements for Handling
> Mixed Mode Float Operations" of the hardware spec.
> 
> > Which you discussed didn't make sense to you and I agreed, if only
> > because my own experiments suggested that the implications that one
> > would derive from a strict interpretation of this (that packed 16-
> > bit
> > data is not supported) are not quite true going by the fact that we
> > are
> > emitting packed instructions on all platforms without any
> > indication
> > that this doesn't work. So what should we do about this one? If we
> > decide that we want to implement this then we have to re-think the
> > implementation we have.
> > 
> 
> The implication on packed HF instructions is likely accidental, I
> don't
> think we should enforce the rule except for conversion operations.
> 
> > Should we just validate the one about the integer/float
> > conversions?
> > Should we do that only on BDW or on all platforms?
> > 
> > >   I don't see the
> > > need to introduce any empirical assumptions into the EU
> > > validator's
> > > execution_type() in order to achieve that, even if that means
> > > that
> > > get_exec_type() and execution_type() don't do the exact same
> > > calculation
> > > -- What you call an inconsistency is the consequence of
> > > execution_type()
> > > being the hardware spec's opinion on what the execution type is,
> > > which
> > > we assume is what we need to use while enforcing a regioning
> > > restriction
> > > that refers to the execution type of the instruction.
> > 
> > Mmm... I guess my question is: is the hardware really assuming that
> > execution type in all cases that involve HF instructions? What I
> > got
> > from your analysis of the whole situation is that the hardware is
> > actually promoting the execution size to 32-bit in some cases, and
> > if
> > that is the case, then does it make sense to implement code in the
> > validator that ignores that fact?
> 
> If the hardware spec chose to ignore that fact and instead gave us a
> different set of regioning restrictions for us to use on HF
> restrictions
> that don't rely on the definition of execution type, we should
> probably
> have the validator match the hardware spec literally.
> 
> > I understand that you think this doesn't have any significance once
> > we
> > make sure that we only apply the restrictions specifically
> > documented
> > for HF from the PRM, and you are probably right, however I have to
> > admit that it feels a bit odd to me that we do that when we know
> > that
> > under the hood there is much more going on and we are producing FS
> > IR
> > code following different rules after all. But maybe it is just me
> > being too paranoid about this...
> > 
> 
> Wouldn't it be even odder to have the hardware restriction validator
> diverge from the documentation it's supposedly validating?
> 
> > 
> > > > >   The validator is
> > > > > still missing an implementation of the quirky HF
> > > > > restrictions,
> > > > > and it
> > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
> > > > 
> > > > While this is true in general, the EU validator does consider
> > > > the
> > > > execution type of the instruction to validate general rules
> > > > such as
> > > > the
> > > > one I mentioned in the commit message in this patch. And that
> > > > part
> > > > of
> > > > the validator is inconsistent with c84ec70b3a72.
> > > 
> > > That part of the validator was also inconsistent with the code
> > > generated
> > > by your original VK_KHR_shader_float16_int8 series even before I
> > > committed c84ec70b3a72.  The reason is that it is trying to
> > > validate
> > > a
> > > restriction that rejects working code, because the "general" rule
> > > it's
> > > trying to enforce isn't supposed to apply to instructions with HF
> > > operands, which is the real bug.
> > 
> > I agree with that, and I'll fix that in my series by preventing
> > that
> > rule to apply to these instructions (pending to agree on the rules
> > that
> > we should validate instead), but as I have already explained above,
> > that was not the inconsistency I was referring to.
> > 
> 
> It's the only actual inconsistency this was fixing.
> 
> > > > In fact, the EU validator is accounting for execution size
> > > > promotion
> > > > of HF instructions to 32-bit in SKL+ and CHV only, for
> > > > conversions
> > > > from HF->F and mixed float mode instructions... which is part
> > > > of
> > > > what
> > > > c84ec70b3a72 addresses at the IR level, which it actually does
> > > > for
> > > > all
> > > > hardware platforms and in more cases.
> > > > 
> > > 
> > > I'm fine with fixing execution_type() to do the right thing in
> > > more
> > > cases and platforms, but I don't think that should involve making
> > > empirical assumptions in the validator.
> > 
> > Just to be clear, in the particular case we are discussing here, I
> > understand this means that we should leave the validator's
> > execution_type() as-is.
> > 
> 
> That's not what I was saying.  Last time I looked at execution_type()
> it
> did seem somewhat sketchy regarding HF types.  I wouldn't be
> surprised
> if it needs some fixes to match the hardware spec (*not* our
> empirical
> knowledge of the hardware) in addition to the above.
> 
> > > > >   You *should*
> > > > > definitely implement those restrictions (as they're stated in
> > > > > the
> > > > > hardware spec, without empirical assumptions) in the
> > > > > validator as
> > > > > part
> > > > > of your VK_KHR_shader_float16_int8 series,
> > > > 
> > > > Again, I am not sure what you mean by "without empirical
> > > > assumptions".
> > > 
> > > I was just paraphrasing your comment.  If you feel the need to
> > > write
> > > a
> > > comment justifying the existence of some code in the validator
> > > based
> > > on
> > > empirical testing, it probably doesn't belong in the validator.
> > > 
> > > > According to the commit message in c84ec70b3a72 "the docs are
> > > > fairly
> > > > imcomplete and inconsistent" and you explained here that you
> > > > had to
> > > > do
> > > > some experimentation of your own using the simulator (where you
> > > > found
> > > > its results to also be inconsistent with the hardware docs) to
> > > > try
> > > > and
> > > > guess what is happening. That's why I was using the term
> > > > "empirical"
> > > > here, since the hardware docs alone aren't clear or correct
> > > > enough
> > > > to
> > > > understand what is really happening, when and in what
> > > > platforms.
> > > > 
> > > 
> > > If some hardware restriction is described in a contradictory or
> > > ambiguous way by the hardware spec we should probably avoid
> > > enforcing
> > > it
> > > altogether in the EU validator.
> > 
> > Okay, in that case I guess you would rule the restriction about the
> > relaxed alignment rule for Word destinations as such, but not the
> > one
> > about half-float/integer conversions. Is that correct?
> > 
> 
> We probably need to enforce both since they apply to different sets
> of
> hardware.
> 
> > > > Anyway, if you don't like the term "empirical" to refer to all
> > > > this,
> > > > that's fine by me, but what I need to know is if we agree that
> > > > we
> > > > need
> > > > to implement the same type promotion rules in the validator
> > > > that we
> > > > are
> > > > implementing in the IR, indepedently of whether refer to them
> > > > as
> > > > "based
> > > > on empirical testing" or not.
> > > > 
> > > 
> > > I don't think we agree on that.  IMHO you want to enforce the
> > > regioning
> > > restrictions that the hardware spec describes for HF types rather
> > > than
> > > the current (incorrect and incomplete) generalization of the
> > > standard
> > > restrictions.  With those in place the apparent inconsistency
> > > between
> > > get_exec_type() and the hardware spec's execution type would be
> > > harmless.
> > 
> > I agree with that. I still feel uneasy about having two different 
> > implementations of the execution type in FS IR and the validator,
> > even
> > if that doesn't lead to any issues when we do what you say here...
> > 
> 
> I feel uneasy about the validator not matching the hardware spec.
> Luckily its only purpose is validating the rest of the compiler, so
> if
> there is an actual inconsistency as you seem to think, it will come
> up.
> 
> > At the very least I think that deserves a comment in the validator
> > explaining why we think that is okay.
> > 
> 
> A quote of the hardware spec should be enough of a
> comment.  Right?  And
> there is already a comment in get_exec_type() explaining why its HF
> type
> handling logic is consistent with the hardware spec restrictions for
> HF
> types.
> 
> > > > >  if anything because currently
> > > > > it will reject working code that uses HF types.
> > > > 
> > > > Just for the sake of clarity, since that sentence above could
> > > > be
> > > > interpreted as if all HF code would be rejected: we have been
> > > > using
> > > > HF
> > > > types since we landed VK_KHR_16bit_storage, which includes
> > > > conversions
> > > > between HF and F and the EU validator is not complaining about
> > > > any
> > > > of
> > > > that. It is currently a problem only with conversions to
> > > > smaller
> > > > types
> > > > (so only conversions to Byte) because that's where we check for
> > > > that
> > > > restriction on the stride, which is based on the execution type
> > > > of
> > > > the
> > > > instruction.
> > > > 
> > > > > 
> > > > > > ---
> > > > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++
> > > > > > ----
> > > > > > ----
> > > > > > -----
> > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > > > 
> > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > index a25010b225c..3bb37677672 100644
> > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > > > gen_device_info
> > > > > > *devinfo, const brw_inst *inst)
> > > > > >     unsigned num_sources = num_sources_from_inst(devinfo,
> > > > > > inst);
> > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > > > >  
> > > > > > -   /* Execution data type is independent of destination
> > > > > > data
> > > > > > type,
> > > > > > except in
> > > > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > > > +   /* Empirical testing suggests that type conversions
> > > > > > involving
> > > > > > half-float
> > > > > > +    * promote execution type to 32-bit. See
> > > > > > get_exec_type() in
> > > > > > brw_ir_fs.h.
> > > > > >      */
> > > > > >     enum brw_reg_type dst_exec_type =
> > > > > > brw_inst_dst_type(devinfo,
> > > > > > inst);
> > > > > >  
> > > > > >     src0_exec_type =
> > > > > > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
> > > > > >     if (num_sources == 1) {
> > > > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > > > -         return dst_exec_type;
> > > > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
> > > > > > src0_exec_type) {
> > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > +            return BRW_REGISTER_TYPE_F;
> > > > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > +            return BRW_REGISTER_TYPE_D;
> > > > > >        }
> > > > > > +
> > > > > >        return src0_exec_type;
> > > > > >     }
> > > > > >  
> > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > > > gen_device_info
> > > > > > *devinfo, const brw_inst *inst)
> > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > > > >        return BRW_REGISTER_TYPE_DF;
> > > > > >  
> > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > -         return BRW_REGISTER_TYPE_F;
> > > > > > -      } else {
> > > > > > -         return BRW_REGISTER_TYPE_HF;
> > > > > > -      }
> > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > +      return BRW_REGISTER_TYPE_F;
> > > > > > +   } else {
> > > > > > +      return BRW_REGISTER_TYPE_HF;
> > > > > >     }
> > > > > >  
> > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > > > -- 
> > > > > > 2.17.1
On Fri, 2019-02-01 at 12:34 +0100, Iago Toral wrote:
> On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> > Iago Toral <itoral@igalia.com> writes:
> > 
> > > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > > Iago Toral <itoral@igalia.com> writes:
> > > > 
> > > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
> > > > > > 
> > > > > > > Commit c84ec70b3a72 implemented execution type promotion
> > > > > > > to
> > > > > > > 32-
> > > > > > > bit
> > > > > > > for
> > > > > > > conversions involving half-float registers, which
> > > > > > > empirical
> > > > > > > testing
> > > > > > > suggested
> > > > > > > was required, but it did not incorporate this change into
> > > > > > > the
> > > > > > > assembly validator
> > > > > > > logic. This commits adds that, preventing validation
> > > > > > > errors
> > > > > > > like
> > > > > > > this:
> > > > > > > 
> > > > > > 
> > > > > > I don't think we should be validating empirical assumptions
> > > > > > in
> > > > > > the EU
> > > > > > validator.
> > > > > 
> > > > > I am not sure I get your point, isn't c84ec70b3a72 also based
> > > > > on
> > > > > empirical testing after all?
> > > > > 
> > > > 
> > > > To some extent, but it doesn't attempt to enforce ISA
> > > > restrictions
> > > > based
> > > > on information obtained empirically.
> > > > 
> > > > > 
> > > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > > > > > > ERROR: Destination stride must be equal to the ratio of
> > > > > > > the
> > > > > > > sizes
> > > > > > > of the
> > > > > > >        execution data type to the destination type
> > > > > > > 
> > > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
> > > > > > > 32-bit
> > > > > > > when any half-float conversion is needed."
> > > > > > 
> > > > > > I don't think this "fixes" anything that ever worked.
> > > > > 
> > > > > It is true that the code in that trace above is not something
> > > > > we
> > > > > can
> > > > > produce right now, because it is a conversion from HF to B
> > > > > and
> > > > > that
> > > > > should only happen within the context of
> > > > > VK_KHR_shader_float16_int8,
> > > > > however, this is a consequence of the fact that since
> > > > > c84ec70b3a72
> > > > > there is an inconsistency between what we do at the IR level
> > > > > regarding
> > > > > execution size of HF conversions and what the EU validator is
> > > > > doing,
> > > > > and from that perspective this is really fixing an
> > > > > inconsistency
> > > > > that
> > > > > didn't exist before, and I thought we would want to address
> > > > > that
> > > > > sooner
> > > > > rather than later and track it down to the original change
> > > > > that
> > > > > introduced that inconsistency so we know where this is coming
> > > > > from.
> > > > > 
> > > > 
> > > > The "inconsistency" between the IR's get_exec_type() and the EU
> > > > validator's execution_type() has existed ever since
> > > > a05b6f25bf4bfad7
> > > > removed the HF assert from get_exec_type() without actually
> > > > implementing
> > > > the code required to handle HF operands (which is what my
> > > > commit
> > > > c84ec70b3a72 did).
> > > 
> > > I agree with the fact that since a05b6f25bf4bfad7 the validator
> > > could
> > > reject valid code and that had nothing to do with your patch,
> > 
> > The validator rejected the same valid HF code since it was written,
> > that
> > had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
> > and
> > it is the real problem this patch was working around.
> > 
> > > but the inconsistency I am talking about here, that this patch
> > > fixes,
> > > is the one about get_exec_type() in the IR and execution_type()
> > > in
> > > the
> > > validator doing different things for HF instructions, which only
> > > exists since your patch and which you discuss below.
> > > 
> > 
> > The "inconsistency" exists ever since get_exec_type() was
> > introduced
> > without correct handling of HF types (even though execution_type()
> > already attempted to handle it).  And I disagree that it's a real
> > inconsistency except due to the fact that the validator is
> > incorrectly
> > attempting to validate the alignment of the destination region
> > according
> > to a rule that doesn't apply to HF types.
> > 
> > > > > Anyway, that was my rationale for the Fixes tag, but if you
> > > > > think
> > > > > this
> > > > > is not useful I am happy to drop this patch and just include
> > > > > it
> > > > > as
> > > > > part
> > > > > of my series without the tag.
> > > > > 
> > > > 
> > > > I'd like to see the actual regioning restrictions for HF types
> > > > implemented in the EU validator as part of your series.
> > > 
> > > Ok, let's see if we can agree on what restrictions should we
> > > implement
> > > then. I can implement this restriction as documented:
> > > 
> > > "Conversion between Integer and HF (Half Float) must be DWord-
> > > aligned
> > > and strided by a DWord on the destination"
> > > 
> > > Instead of trying to apply the general one that is currently
> > > breaking.
> > > That will fix the assertion issue. I guess my issue with it was
> > > that
> > > going by your analysis this restriction is not telling the full
> > > picture, this is what you had to say about this restriction:
> > > 
> > > "I have a feeling that the reason for this may be that the 16-bit
> > > pipeline lacks the ability to handle conversions from or to half-
> > > float, 
> > > so the execution type is implicitly promoted to the matching
> > > (integer
> > > or floating-point) 32-bit type where any HF conversion would be
> > > needed.  And on those the usual alignment restriction of the
> > > destination to a larger execution type applies."
> > > 
> > > But I guess your point is that we can ignore these details at the
> > > validator level and just focus on validating strictly what the
> > > PRM
> > > says. Fair enough.
> > > 
> > > Unfortunatley, you also mentioned that this restriction *seems*
> > > to
> > > be
> > > overriden by this one on all platforms but BDW (even though the
> > > both
> > > are listed, at least I see both in my SKL docs):
> > > 
> > > "There is a relaxed alignment rule for word destinations. When
> > > the
> > > destination type is word (UW, W, HF), destination data types can
> > > be
> > > aligned to either the lowest word or the second lowest word of
> > > the
> > > execution channel. This means the destination data words can be
> > > either
> > > all in the even word locations or all in the odd word locations."
> > > 
> > 
> > I'd implement this one since it seems like the most specific,
> > except
> > on
> > BDW where only the previous (almost equivalent) restriction seems
> > to
> > apply. 
> 
> I have just realized that this restriction could be at odds with this
> other aspect of the spec:
> 
> "There is no direct conversion from B/UB to DF or DF to B/UB. Use two
> instructions and a word or DWord intermediate type."
> 
> So it is okay to convert from B to W and then from W to DF/Q (it has
> the same text from conversions between HF and 64-bit types). We
> actually emit these conversions and they work fine going by the
> existing CTS tests, but these conversions are not aligned to 32-bit
> like the restriction states, instead they are aligned to 64-bit (the
> regioning lowering pass does this):
> 
> mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };
> 
> So I think this is a bit inconsistent again and I guess we need to
> choose what we want to do. I can think of 4 options:
> 
> 1. We could assume that the restriction is correctly formulated and
> split the instructions to respect the restriction as-is even if we
> have
> not found any evidence that this doesn't work
> 
> 2. We could assume that the restriction is correctly formulated and
> the
> regioning lowering pass need to be fixed to generate a conversion
> with
> a stride of 2 even if the execution size if 64-bit (haven't tried
> this
> since it sounds sketchy to me).
> 
> 3. We could understand that the restriction is strictly formulated
> for
> conversions from a 32-bit execution types, and decide that it does
> not
> apply to conversions from 64-bit sources.
> 
> 4. We can assume that the restriction is formulated incorrectly and
> instead of saying that destination words should all be in even or odd
> slots, assuming a conversion from a 32-bit type, it should just say
> that they should be aligned to the execution type to also accomodate
> conversions from 64-bit types.
> 
> And indepedently of how we decide to interpret the restriction in the
> end, given that we have discussed here that we should only validate
> things as they are described in the PRM, I think we also need to
> decide
> what we implement in the validator (if anything) if we decide to go
> with either 3) or 4) (were we would not be exactly following the PRM
> to
> the letter).
> 
> I kind of think that 4) might be what is going on here...
> what do you think?

And one more thing: if we choose 4) then we would basically be falling
back to the general rule where we require that destination stride must
be equal to the ratio of the sizes of the execution data type to the
destination type, with the exception that the rule we are discussing
here allows some flexibility on the alignment of the subnr... but that
relaxation is formulated with 32-bit language too so I would not know
what to do with that if we decide to extend the interpretation of the
restriction as described in 4), so that makes me think that maybe the
interpretation in 3) is more likely.

> >  There are shitloads of other restrictions we're missing for HF
> > types BTW, check out the section "Special Requirements for Handling
> > Mixed Mode Float Operations" of the hardware spec.
> > 
> > > Which you discussed didn't make sense to you and I agreed, if
> > > only
> > > because my own experiments suggested that the implications that
> > > one
> > > would derive from a strict interpretation of this (that packed
> > > 16-
> > > bit
> > > data is not supported) are not quite true going by the fact that
> > > we
> > > are
> > > emitting packed instructions on all platforms without any
> > > indication
> > > that this doesn't work. So what should we do about this one? If
> > > we
> > > decide that we want to implement this then we have to re-think
> > > the
> > > implementation we have.
> > > 
> > 
> > The implication on packed HF instructions is likely accidental, I
> > don't
> > think we should enforce the rule except for conversion operations.
> > 
> > > Should we just validate the one about the integer/float
> > > conversions?
> > > Should we do that only on BDW or on all platforms?
> > > 
> > > >   I don't see the
> > > > need to introduce any empirical assumptions into the EU
> > > > validator's
> > > > execution_type() in order to achieve that, even if that means
> > > > that
> > > > get_exec_type() and execution_type() don't do the exact same
> > > > calculation
> > > > -- What you call an inconsistency is the consequence of
> > > > execution_type()
> > > > being the hardware spec's opinion on what the execution type
> > > > is,
> > > > which
> > > > we assume is what we need to use while enforcing a regioning
> > > > restriction
> > > > that refers to the execution type of the instruction.
> > > 
> > > Mmm... I guess my question is: is the hardware really assuming
> > > that
> > > execution type in all cases that involve HF instructions? What I
> > > got
> > > from your analysis of the whole situation is that the hardware is
> > > actually promoting the execution size to 32-bit in some cases,
> > > and
> > > if
> > > that is the case, then does it make sense to implement code in
> > > the
> > > validator that ignores that fact?
> > 
> > If the hardware spec chose to ignore that fact and instead gave us
> > a
> > different set of regioning restrictions for us to use on HF
> > restrictions
> > that don't rely on the definition of execution type, we should
> > probably
> > have the validator match the hardware spec literally.
> > 
> > > I understand that you think this doesn't have any significance
> > > once
> > > we
> > > make sure that we only apply the restrictions specifically
> > > documented
> > > for HF from the PRM, and you are probably right, however I have
> > > to
> > > admit that it feels a bit odd to me that we do that when we know
> > > that
> > > under the hood there is much more going on and we are producing
> > > FS
> > > IR
> > > code following different rules after all. But maybe it is just me
> > > being too paranoid about this...
> > > 
> > 
> > Wouldn't it be even odder to have the hardware restriction
> > validator
> > diverge from the documentation it's supposedly validating?
> > 
> > > 
> > > > > >   The validator is
> > > > > > still missing an implementation of the quirky HF
> > > > > > restrictions,
> > > > > > and it
> > > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
> > > > > 
> > > > > While this is true in general, the EU validator does consider
> > > > > the
> > > > > execution type of the instruction to validate general rules
> > > > > such as
> > > > > the
> > > > > one I mentioned in the commit message in this patch. And that
> > > > > part
> > > > > of
> > > > > the validator is inconsistent with c84ec70b3a72.
> > > > 
> > > > That part of the validator was also inconsistent with the code
> > > > generated
> > > > by your original VK_KHR_shader_float16_int8 series even before
> > > > I
> > > > committed c84ec70b3a72.  The reason is that it is trying to
> > > > validate
> > > > a
> > > > restriction that rejects working code, because the "general"
> > > > rule
> > > > it's
> > > > trying to enforce isn't supposed to apply to instructions with
> > > > HF
> > > > operands, which is the real bug.
> > > 
> > > I agree with that, and I'll fix that in my series by preventing
> > > that
> > > rule to apply to these instructions (pending to agree on the
> > > rules
> > > that
> > > we should validate instead), but as I have already explained
> > > above,
> > > that was not the inconsistency I was referring to.
> > > 
> > 
> > It's the only actual inconsistency this was fixing.
> > 
> > > > > In fact, the EU validator is accounting for execution size
> > > > > promotion
> > > > > of HF instructions to 32-bit in SKL+ and CHV only, for
> > > > > conversions
> > > > > from HF->F and mixed float mode instructions... which is part
> > > > > of
> > > > > what
> > > > > c84ec70b3a72 addresses at the IR level, which it actually
> > > > > does
> > > > > for
> > > > > all
> > > > > hardware platforms and in more cases.
> > > > > 
> > > > 
> > > > I'm fine with fixing execution_type() to do the right thing in
> > > > more
> > > > cases and platforms, but I don't think that should involve
> > > > making
> > > > empirical assumptions in the validator.
> > > 
> > > Just to be clear, in the particular case we are discussing here,
> > > I
> > > understand this means that we should leave the validator's
> > > execution_type() as-is.
> > > 
> > 
> > That's not what I was saying.  Last time I looked at
> > execution_type()
> > it
> > did seem somewhat sketchy regarding HF types.  I wouldn't be
> > surprised
> > if it needs some fixes to match the hardware spec (*not* our
> > empirical
> > knowledge of the hardware) in addition to the above.
> > 
> > > > > >   You *should*
> > > > > > definitely implement those restrictions (as they're stated
> > > > > > in
> > > > > > the
> > > > > > hardware spec, without empirical assumptions) in the
> > > > > > validator as
> > > > > > part
> > > > > > of your VK_KHR_shader_float16_int8 series,
> > > > > 
> > > > > Again, I am not sure what you mean by "without empirical
> > > > > assumptions".
> > > > 
> > > > I was just paraphrasing your comment.  If you feel the need to
> > > > write
> > > > a
> > > > comment justifying the existence of some code in the validator
> > > > based
> > > > on
> > > > empirical testing, it probably doesn't belong in the validator.
> > > > 
> > > > > According to the commit message in c84ec70b3a72 "the docs are
> > > > > fairly
> > > > > imcomplete and inconsistent" and you explained here that you
> > > > > had to
> > > > > do
> > > > > some experimentation of your own using the simulator (where
> > > > > you
> > > > > found
> > > > > its results to also be inconsistent with the hardware docs)
> > > > > to
> > > > > try
> > > > > and
> > > > > guess what is happening. That's why I was using the term
> > > > > "empirical"
> > > > > here, since the hardware docs alone aren't clear or correct
> > > > > enough
> > > > > to
> > > > > understand what is really happening, when and in what
> > > > > platforms.
> > > > > 
> > > > 
> > > > If some hardware restriction is described in a contradictory or
> > > > ambiguous way by the hardware spec we should probably avoid
> > > > enforcing
> > > > it
> > > > altogether in the EU validator.
> > > 
> > > Okay, in that case I guess you would rule the restriction about
> > > the
> > > relaxed alignment rule for Word destinations as such, but not the
> > > one
> > > about half-float/integer conversions. Is that correct?
> > > 
> > 
> > We probably need to enforce both since they apply to different sets
> > of
> > hardware.
> > 
> > > > > Anyway, if you don't like the term "empirical" to refer to
> > > > > all
> > > > > this,
> > > > > that's fine by me, but what I need to know is if we agree
> > > > > that
> > > > > we
> > > > > need
> > > > > to implement the same type promotion rules in the validator
> > > > > that we
> > > > > are
> > > > > implementing in the IR, indepedently of whether refer to them
> > > > > as
> > > > > "based
> > > > > on empirical testing" or not.
> > > > > 
> > > > 
> > > > I don't think we agree on that.  IMHO you want to enforce the
> > > > regioning
> > > > restrictions that the hardware spec describes for HF types
> > > > rather
> > > > than
> > > > the current (incorrect and incomplete) generalization of the
> > > > standard
> > > > restrictions.  With those in place the apparent inconsistency
> > > > between
> > > > get_exec_type() and the hardware spec's execution type would be
> > > > harmless.
> > > 
> > > I agree with that. I still feel uneasy about having two
> > > different 
> > > implementations of the execution type in FS IR and the validator,
> > > even
> > > if that doesn't lead to any issues when we do what you say
> > > here...
> > > 
> > 
> > I feel uneasy about the validator not matching the hardware spec.
> > Luckily its only purpose is validating the rest of the compiler, so
> > if
> > there is an actual inconsistency as you seem to think, it will come
> > up.
> > 
> > > At the very least I think that deserves a comment in the
> > > validator
> > > explaining why we think that is okay.
> > > 
> > 
> > A quote of the hardware spec should be enough of a
> > comment.  Right?  And
> > there is already a comment in get_exec_type() explaining why its HF
> > type
> > handling logic is consistent with the hardware spec restrictions
> > for
> > HF
> > types.
> > 
> > > > > >  if anything because currently
> > > > > > it will reject working code that uses HF types.
> > > > > 
> > > > > Just for the sake of clarity, since that sentence above could
> > > > > be
> > > > > interpreted as if all HF code would be rejected: we have been
> > > > > using
> > > > > HF
> > > > > types since we landed VK_KHR_16bit_storage, which includes
> > > > > conversions
> > > > > between HF and F and the EU validator is not complaining
> > > > > about
> > > > > any
> > > > > of
> > > > > that. It is currently a problem only with conversions to
> > > > > smaller
> > > > > types
> > > > > (so only conversions to Byte) because that's where we check
> > > > > for
> > > > > that
> > > > > restriction on the stride, which is based on the execution
> > > > > type
> > > > > of
> > > > > the
> > > > > instruction.
> > > > > 
> > > > > > 
> > > > > > > ---
> > > > > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++
> > > > > > > ----
> > > > > > > ----
> > > > > > > -----
> > > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > > index a25010b225c..3bb37677672 100644
> > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > > > > gen_device_info
> > > > > > > *devinfo, const brw_inst *inst)
> > > > > > >     unsigned num_sources = num_sources_from_inst(devinfo,
> > > > > > > inst);
> > > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > > > > >  
> > > > > > > -   /* Execution data type is independent of destination
> > > > > > > data
> > > > > > > type,
> > > > > > > except in
> > > > > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > > > > +   /* Empirical testing suggests that type conversions
> > > > > > > involving
> > > > > > > half-float
> > > > > > > +    * promote execution type to 32-bit. See
> > > > > > > get_exec_type() in
> > > > > > > brw_ir_fs.h.
> > > > > > >      */
> > > > > > >     enum brw_reg_type dst_exec_type =
> > > > > > > brw_inst_dst_type(devinfo,
> > > > > > > inst);
> > > > > > >  
> > > > > > >     src0_exec_type =
> > > > > > > execution_type_for_type(brw_inst_src0_type(devinfo,
> > > > > > > inst));
> > > > > > >     if (num_sources == 1) {
> > > > > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview)
> > > > > > > &&
> > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > > > > -         return dst_exec_type;
> > > > > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type
> > > > > > > !=
> > > > > > > src0_exec_type) {
> > > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > > +            return BRW_REGISTER_TYPE_F;
> > > > > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > > +            return BRW_REGISTER_TYPE_D;
> > > > > > >        }
> > > > > > > +
> > > > > > >        return src0_exec_type;
> > > > > > >     }
> > > > > > >  
> > > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > > > > gen_device_info
> > > > > > > *devinfo, const brw_inst *inst)
> > > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > > > > >        return BRW_REGISTER_TYPE_DF;
> > > > > > >  
> > > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > -         return BRW_REGISTER_TYPE_F;
> > > > > > > -      } else {
> > > > > > > -         return BRW_REGISTER_TYPE_HF;
> > > > > > > -      }
> > > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > +      return BRW_REGISTER_TYPE_F;
> > > > > > > +   } else {
> > > > > > > +      return BRW_REGISTER_TYPE_HF;
> > > > > > >     }
> > > > > > >  
> > > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > > > > -- 
> > > > > > > 2.17.1
Iago Toral <itoral@igalia.com> writes:

> On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
>> Iago Toral <itoral@igalia.com> writes:
>> 
>> > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
>> > > Iago Toral <itoral@igalia.com> writes:
>> > > 
>> > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
>> > > > > 
>> > > > > > Commit c84ec70b3a72 implemented execution type promotion to
>> > > > > > 32-
>> > > > > > bit
>> > > > > > for
>> > > > > > conversions involving half-float registers, which empirical
>> > > > > > testing
>> > > > > > suggested
>> > > > > > was required, but it did not incorporate this change into
>> > > > > > the
>> > > > > > assembly validator
>> > > > > > logic. This commits adds that, preventing validation errors
>> > > > > > like
>> > > > > > this:
>> > > > > > 
>> > > > > 
>> > > > > I don't think we should be validating empirical assumptions
>> > > > > in
>> > > > > the EU
>> > > > > validator.
>> > > > 
>> > > > I am not sure I get your point, isn't c84ec70b3a72 also based
>> > > > on
>> > > > empirical testing after all?
>> > > > 
>> > > 
>> > > To some extent, but it doesn't attempt to enforce ISA
>> > > restrictions
>> > > based
>> > > on information obtained empirically.
>> > > 
>> > > > 
>> > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
>> > > > > > ERROR: Destination stride must be equal to the ratio of the
>> > > > > > sizes
>> > > > > > of the
>> > > > > >        execution data type to the destination type
>> > > > > > 
>> > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
>> > > > > > 32-bit
>> > > > > > when any half-float conversion is needed."
>> > > > > 
>> > > > > I don't think this "fixes" anything that ever worked.
>> > > > 
>> > > > It is true that the code in that trace above is not something
>> > > > we
>> > > > can
>> > > > produce right now, because it is a conversion from HF to B and
>> > > > that
>> > > > should only happen within the context of
>> > > > VK_KHR_shader_float16_int8,
>> > > > however, this is a consequence of the fact that since
>> > > > c84ec70b3a72
>> > > > there is an inconsistency between what we do at the IR level
>> > > > regarding
>> > > > execution size of HF conversions and what the EU validator is
>> > > > doing,
>> > > > and from that perspective this is really fixing an
>> > > > inconsistency
>> > > > that
>> > > > didn't exist before, and I thought we would want to address
>> > > > that
>> > > > sooner
>> > > > rather than later and track it down to the original change that
>> > > > introduced that inconsistency so we know where this is coming
>> > > > from.
>> > > > 
>> > > 
>> > > The "inconsistency" between the IR's get_exec_type() and the EU
>> > > validator's execution_type() has existed ever since
>> > > a05b6f25bf4bfad7
>> > > removed the HF assert from get_exec_type() without actually
>> > > implementing
>> > > the code required to handle HF operands (which is what my commit
>> > > c84ec70b3a72 did).
>> > 
>> > I agree with the fact that since a05b6f25bf4bfad7 the validator
>> > could
>> > reject valid code and that had nothing to do with your patch,
>> 
>> The validator rejected the same valid HF code since it was written,
>> that
>> had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
>> and
>> it is the real problem this patch was working around.
>> 
>> > but the inconsistency I am talking about here, that this patch
>> > fixes,
>> > is the one about get_exec_type() in the IR and execution_type() in
>> > the
>> > validator doing different things for HF instructions, which only
>> > exists since your patch and which you discuss below.
>> > 
>> 
>> The "inconsistency" exists ever since get_exec_type() was introduced
>> without correct handling of HF types (even though execution_type()
>> already attempted to handle it).  And I disagree that it's a real
>> inconsistency except due to the fact that the validator is
>> incorrectly
>> attempting to validate the alignment of the destination region
>> according
>> to a rule that doesn't apply to HF types.
>> 
>> > > > Anyway, that was my rationale for the Fixes tag, but if you
>> > > > think
>> > > > this
>> > > > is not useful I am happy to drop this patch and just include it
>> > > > as
>> > > > part
>> > > > of my series without the tag.
>> > > > 
>> > > 
>> > > I'd like to see the actual regioning restrictions for HF types
>> > > implemented in the EU validator as part of your series.
>> > 
>> > Ok, let's see if we can agree on what restrictions should we
>> > implement
>> > then. I can implement this restriction as documented:
>> > 
>> > "Conversion between Integer and HF (Half Float) must be DWord-
>> > aligned
>> > and strided by a DWord on the destination"
>> > 
>> > Instead of trying to apply the general one that is currently
>> > breaking.
>> > That will fix the assertion issue. I guess my issue with it was
>> > that
>> > going by your analysis this restriction is not telling the full
>> > picture, this is what you had to say about this restriction:
>> > 
>> > "I have a feeling that the reason for this may be that the 16-bit
>> > pipeline lacks the ability to handle conversions from or to half-
>> > float, 
>> > so the execution type is implicitly promoted to the matching
>> > (integer
>> > or floating-point) 32-bit type where any HF conversion would be
>> > needed.  And on those the usual alignment restriction of the
>> > destination to a larger execution type applies."
>> > 
>> > But I guess your point is that we can ignore these details at the
>> > validator level and just focus on validating strictly what the PRM
>> > says. Fair enough.
>> > 
>> > Unfortunatley, you also mentioned that this restriction *seems* to
>> > be
>> > overriden by this one on all platforms but BDW (even though the
>> > both
>> > are listed, at least I see both in my SKL docs):
>> > 
>> > "There is a relaxed alignment rule for word destinations. When the
>> > destination type is word (UW, W, HF), destination data types can be
>> > aligned to either the lowest word or the second lowest word of the
>> > execution channel. This means the destination data words can be
>> > either
>> > all in the even word locations or all in the odd word locations."
>> > 
>> 
>> I'd implement this one since it seems like the most specific, except
>> on
>> BDW where only the previous (almost equivalent) restriction seems to
>> apply. 
>
> I have just realized that this restriction could be at odds with this
> other aspect of the spec:
>
> "There is no direct conversion from B/UB to DF or DF to B/UB. Use two
> instructions and a word or DWord intermediate type."
>
> So it is okay to convert from B to W and then from W to DF/Q (it has
> the same text from conversions between HF and 64-bit types). We
> actually emit these conversions and they work fine going by the
> existing CTS tests, but these conversions are not aligned to 32-bit
> like the restriction states, instead they are aligned to 64-bit (the
> regioning lowering pass does this):
>
> mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };
>
> So I think this is a bit inconsistent again and I guess we need to
> choose what we want to do. I can think of 4 options:
>
> 1. We could assume that the restriction is correctly formulated and
> split the instructions to respect the restriction as-is even if we have
> not found any evidence that this doesn't work
>
> 2. We could assume that the restriction is correctly formulated and the
> regioning lowering pass need to be fixed to generate a conversion with
> a stride of 2 even if the execution size if 64-bit (haven't tried this
> since it sounds sketchy to me).
>
> 3. We could understand that the restriction is strictly formulated for
> conversions from a 32-bit execution types, and decide that it does not
> apply to conversions from 64-bit sources.
>
> 4. We can assume that the restriction is formulated incorrectly and
> instead of saying that destination words should all be in even or odd
> slots, assuming a conversion from a 32-bit type, it should just say
> that they should be aligned to the execution type to also accomodate
> conversions from 64-bit types.
>

4. is basically what the back-end is assuming right now, but it is
indeed not literally equivalent to the regioning restrictions as they're
formulated in the B-Spec.  I'd suggest not validating the DWORD
alignment rule for HF instructions whenever the execution type is
greater than 32 bits, since the B-Spec language about those is rather
inconsistent.

> And indepedently of how we decide to interpret the restriction in the
> end, given that we have discussed here that we should only validate
> things as they are described in the PRM, I think we also need to decide
> what we implement in the validator (if anything) if we decide to go
> with either 3) or 4) (were we would not be exactly following the PRM to
> the letter).
>
> I kind of think that 4) might be what is going on here...
> what do you think?
>
>>  There are shitloads of other restrictions we're missing for HF
>> types BTW, check out the section "Special Requirements for Handling
>> Mixed Mode Float Operations" of the hardware spec.
>> 
>> > Which you discussed didn't make sense to you and I agreed, if only
>> > because my own experiments suggested that the implications that one
>> > would derive from a strict interpretation of this (that packed 16-
>> > bit
>> > data is not supported) are not quite true going by the fact that we
>> > are
>> > emitting packed instructions on all platforms without any
>> > indication
>> > that this doesn't work. So what should we do about this one? If we
>> > decide that we want to implement this then we have to re-think the
>> > implementation we have.
>> > 
>> 
>> The implication on packed HF instructions is likely accidental, I
>> don't
>> think we should enforce the rule except for conversion operations.
>> 
>> > Should we just validate the one about the integer/float
>> > conversions?
>> > Should we do that only on BDW or on all platforms?
>> > 
>> > >   I don't see the
>> > > need to introduce any empirical assumptions into the EU
>> > > validator's
>> > > execution_type() in order to achieve that, even if that means
>> > > that
>> > > get_exec_type() and execution_type() don't do the exact same
>> > > calculation
>> > > -- What you call an inconsistency is the consequence of
>> > > execution_type()
>> > > being the hardware spec's opinion on what the execution type is,
>> > > which
>> > > we assume is what we need to use while enforcing a regioning
>> > > restriction
>> > > that refers to the execution type of the instruction.
>> > 
>> > Mmm... I guess my question is: is the hardware really assuming that
>> > execution type in all cases that involve HF instructions? What I
>> > got
>> > from your analysis of the whole situation is that the hardware is
>> > actually promoting the execution size to 32-bit in some cases, and
>> > if
>> > that is the case, then does it make sense to implement code in the
>> > validator that ignores that fact?
>> 
>> If the hardware spec chose to ignore that fact and instead gave us a
>> different set of regioning restrictions for us to use on HF
>> restrictions
>> that don't rely on the definition of execution type, we should
>> probably
>> have the validator match the hardware spec literally.
>> 
>> > I understand that you think this doesn't have any significance once
>> > we
>> > make sure that we only apply the restrictions specifically
>> > documented
>> > for HF from the PRM, and you are probably right, however I have to
>> > admit that it feels a bit odd to me that we do that when we know
>> > that
>> > under the hood there is much more going on and we are producing FS
>> > IR
>> > code following different rules after all. But maybe it is just me
>> > being too paranoid about this...
>> > 
>> 
>> Wouldn't it be even odder to have the hardware restriction validator
>> diverge from the documentation it's supposedly validating?
>> 
>> > 
>> > > > >   The validator is
>> > > > > still missing an implementation of the quirky HF
>> > > > > restrictions,
>> > > > > and it
>> > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
>> > > > 
>> > > > While this is true in general, the EU validator does consider
>> > > > the
>> > > > execution type of the instruction to validate general rules
>> > > > such as
>> > > > the
>> > > > one I mentioned in the commit message in this patch. And that
>> > > > part
>> > > > of
>> > > > the validator is inconsistent with c84ec70b3a72.
>> > > 
>> > > That part of the validator was also inconsistent with the code
>> > > generated
>> > > by your original VK_KHR_shader_float16_int8 series even before I
>> > > committed c84ec70b3a72.  The reason is that it is trying to
>> > > validate
>> > > a
>> > > restriction that rejects working code, because the "general" rule
>> > > it's
>> > > trying to enforce isn't supposed to apply to instructions with HF
>> > > operands, which is the real bug.
>> > 
>> > I agree with that, and I'll fix that in my series by preventing
>> > that
>> > rule to apply to these instructions (pending to agree on the rules
>> > that
>> > we should validate instead), but as I have already explained above,
>> > that was not the inconsistency I was referring to.
>> > 
>> 
>> It's the only actual inconsistency this was fixing.
>> 
>> > > > In fact, the EU validator is accounting for execution size
>> > > > promotion
>> > > > of HF instructions to 32-bit in SKL+ and CHV only, for
>> > > > conversions
>> > > > from HF->F and mixed float mode instructions... which is part
>> > > > of
>> > > > what
>> > > > c84ec70b3a72 addresses at the IR level, which it actually does
>> > > > for
>> > > > all
>> > > > hardware platforms and in more cases.
>> > > > 
>> > > 
>> > > I'm fine with fixing execution_type() to do the right thing in
>> > > more
>> > > cases and platforms, but I don't think that should involve making
>> > > empirical assumptions in the validator.
>> > 
>> > Just to be clear, in the particular case we are discussing here, I
>> > understand this means that we should leave the validator's
>> > execution_type() as-is.
>> > 
>> 
>> That's not what I was saying.  Last time I looked at execution_type()
>> it
>> did seem somewhat sketchy regarding HF types.  I wouldn't be
>> surprised
>> if it needs some fixes to match the hardware spec (*not* our
>> empirical
>> knowledge of the hardware) in addition to the above.
>> 
>> > > > >   You *should*
>> > > > > definitely implement those restrictions (as they're stated in
>> > > > > the
>> > > > > hardware spec, without empirical assumptions) in the
>> > > > > validator as
>> > > > > part
>> > > > > of your VK_KHR_shader_float16_int8 series,
>> > > > 
>> > > > Again, I am not sure what you mean by "without empirical
>> > > > assumptions".
>> > > 
>> > > I was just paraphrasing your comment.  If you feel the need to
>> > > write
>> > > a
>> > > comment justifying the existence of some code in the validator
>> > > based
>> > > on
>> > > empirical testing, it probably doesn't belong in the validator.
>> > > 
>> > > > According to the commit message in c84ec70b3a72 "the docs are
>> > > > fairly
>> > > > imcomplete and inconsistent" and you explained here that you
>> > > > had to
>> > > > do
>> > > > some experimentation of your own using the simulator (where you
>> > > > found
>> > > > its results to also be inconsistent with the hardware docs) to
>> > > > try
>> > > > and
>> > > > guess what is happening. That's why I was using the term
>> > > > "empirical"
>> > > > here, since the hardware docs alone aren't clear or correct
>> > > > enough
>> > > > to
>> > > > understand what is really happening, when and in what
>> > > > platforms.
>> > > > 
>> > > 
>> > > If some hardware restriction is described in a contradictory or
>> > > ambiguous way by the hardware spec we should probably avoid
>> > > enforcing
>> > > it
>> > > altogether in the EU validator.
>> > 
>> > Okay, in that case I guess you would rule the restriction about the
>> > relaxed alignment rule for Word destinations as such, but not the
>> > one
>> > about half-float/integer conversions. Is that correct?
>> > 
>> 
>> We probably need to enforce both since they apply to different sets
>> of
>> hardware.
>> 
>> > > > Anyway, if you don't like the term "empirical" to refer to all
>> > > > this,
>> > > > that's fine by me, but what I need to know is if we agree that
>> > > > we
>> > > > need
>> > > > to implement the same type promotion rules in the validator
>> > > > that we
>> > > > are
>> > > > implementing in the IR, indepedently of whether refer to them
>> > > > as
>> > > > "based
>> > > > on empirical testing" or not.
>> > > > 
>> > > 
>> > > I don't think we agree on that.  IMHO you want to enforce the
>> > > regioning
>> > > restrictions that the hardware spec describes for HF types rather
>> > > than
>> > > the current (incorrect and incomplete) generalization of the
>> > > standard
>> > > restrictions.  With those in place the apparent inconsistency
>> > > between
>> > > get_exec_type() and the hardware spec's execution type would be
>> > > harmless.
>> > 
>> > I agree with that. I still feel uneasy about having two different 
>> > implementations of the execution type in FS IR and the validator,
>> > even
>> > if that doesn't lead to any issues when we do what you say here...
>> > 
>> 
>> I feel uneasy about the validator not matching the hardware spec.
>> Luckily its only purpose is validating the rest of the compiler, so
>> if
>> there is an actual inconsistency as you seem to think, it will come
>> up.
>> 
>> > At the very least I think that deserves a comment in the validator
>> > explaining why we think that is okay.
>> > 
>> 
>> A quote of the hardware spec should be enough of a
>> comment.  Right?  And
>> there is already a comment in get_exec_type() explaining why its HF
>> type
>> handling logic is consistent with the hardware spec restrictions for
>> HF
>> types.
>> 
>> > > > >  if anything because currently
>> > > > > it will reject working code that uses HF types.
>> > > > 
>> > > > Just for the sake of clarity, since that sentence above could
>> > > > be
>> > > > interpreted as if all HF code would be rejected: we have been
>> > > > using
>> > > > HF
>> > > > types since we landed VK_KHR_16bit_storage, which includes
>> > > > conversions
>> > > > between HF and F and the EU validator is not complaining about
>> > > > any
>> > > > of
>> > > > that. It is currently a problem only with conversions to
>> > > > smaller
>> > > > types
>> > > > (so only conversions to Byte) because that's where we check for
>> > > > that
>> > > > restriction on the stride, which is based on the execution type
>> > > > of
>> > > > the
>> > > > instruction.
>> > > > 
>> > > > > 
>> > > > > > ---
>> > > > > >  src/intel/compiler/brw_eu_validate.c | 27 ++++++++++++++
>> > > > > > ----
>> > > > > > ----
>> > > > > > -----
>> > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
>> > > > > > 
>> > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
>> > > > > > b/src/intel/compiler/brw_eu_validate.c
>> > > > > > index a25010b225c..3bb37677672 100644
>> > > > > > --- a/src/intel/compiler/brw_eu_validate.c
>> > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
>> > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
>> > > > > > gen_device_info
>> > > > > > *devinfo, const brw_inst *inst)
>> > > > > >     unsigned num_sources = num_sources_from_inst(devinfo,
>> > > > > > inst);
>> > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
>> > > > > >  
>> > > > > > -   /* Execution data type is independent of destination
>> > > > > > data
>> > > > > > type,
>> > > > > > except in
>> > > > > > -    * mixed F/HF instructions on CHV and SKL+.
>> > > > > > +   /* Empirical testing suggests that type conversions
>> > > > > > involving
>> > > > > > half-float
>> > > > > > +    * promote execution type to 32-bit. See
>> > > > > > get_exec_type() in
>> > > > > > brw_ir_fs.h.
>> > > > > >      */
>> > > > > >     enum brw_reg_type dst_exec_type =
>> > > > > > brw_inst_dst_type(devinfo,
>> > > > > > inst);
>> > > > > >  
>> > > > > >     src0_exec_type =
>> > > > > > execution_type_for_type(brw_inst_src0_type(devinfo, inst));
>> > > > > >     if (num_sources == 1) {
>> > > > > > -      if ((devinfo->gen >= 9 || devinfo->is_cherryview) &&
>> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
>> > > > > > -         return dst_exec_type;
>> > > > > > +      if (type_sz(src0_exec_type) == 2 && dst_exec_type !=
>> > > > > > src0_exec_type) {
>> > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
>> > > > > > +            return BRW_REGISTER_TYPE_F;
>> > > > > > +         else if (dst_exec_type == BRW_REGISTER_TYPE_HF)
>> > > > > > +            return BRW_REGISTER_TYPE_D;
>> > > > > >        }
>> > > > > > +
>> > > > > >        return src0_exec_type;
>> > > > > >     }
>> > > > > >  
>> > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
>> > > > > > gen_device_info
>> > > > > > *devinfo, const brw_inst *inst)
>> > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
>> > > > > >        return BRW_REGISTER_TYPE_DF;
>> > > > > >  
>> > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
>> > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > > > -         return BRW_REGISTER_TYPE_F;
>> > > > > > -      } else {
>> > > > > > -         return BRW_REGISTER_TYPE_HF;
>> > > > > > -      }
>> > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > > > +      return BRW_REGISTER_TYPE_F;
>> > > > > > +   } else {
>> > > > > > +      return BRW_REGISTER_TYPE_HF;
>> > > > > >     }
>> > > > > >  
>> > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
>> > > > > > -- 
>> > > > > > 2.17.1
On Fri, 2019-02-01 at 11:23 -0800, Francisco Jerez wrote:
> Iago Toral <itoral@igalia.com> writes:
> 
> > On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> > > Iago Toral <itoral@igalia.com> writes:
> > > 
> > > > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > > > Iago Toral <itoral@igalia.com> writes:
> > > > > 
> > > > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
> > > > > > > 
> > > > > > > > Commit c84ec70b3a72 implemented execution type
> > > > > > > > promotion to
> > > > > > > > 32-
> > > > > > > > bit
> > > > > > > > for
> > > > > > > > conversions involving half-float registers, which
> > > > > > > > empirical
> > > > > > > > testing
> > > > > > > > suggested
> > > > > > > > was required, but it did not incorporate this change
> > > > > > > > into
> > > > > > > > the
> > > > > > > > assembly validator
> > > > > > > > logic. This commits adds that, preventing validation
> > > > > > > > errors
> > > > > > > > like
> > > > > > > > this:
> > > > > > > > 
> > > > > > > 
> > > > > > > I don't think we should be validating empirical
> > > > > > > assumptions
> > > > > > > in
> > > > > > > the EU
> > > > > > > validator.
> > > > > > 
> > > > > > I am not sure I get your point, isn't c84ec70b3a72 also
> > > > > > based
> > > > > > on
> > > > > > empirical testing after all?
> > > > > > 
> > > > > 
> > > > > To some extent, but it doesn't attempt to enforce ISA
> > > > > restrictions
> > > > > based
> > > > > on information obtained empirically.
> > > > > 
> > > > > > 
> > > > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > > > > > > > ERROR: Destination stride must be equal to the ratio of
> > > > > > > > the
> > > > > > > > sizes
> > > > > > > > of the
> > > > > > > >        execution data type to the destination type
> > > > > > > > 
> > > > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type
> > > > > > > > to
> > > > > > > > 32-bit
> > > > > > > > when any half-float conversion is needed."
> > > > > > > 
> > > > > > > I don't think this "fixes" anything that ever worked.
> > > > > > 
> > > > > > It is true that the code in that trace above is not
> > > > > > something
> > > > > > we
> > > > > > can
> > > > > > produce right now, because it is a conversion from HF to B
> > > > > > and
> > > > > > that
> > > > > > should only happen within the context of
> > > > > > VK_KHR_shader_float16_int8,
> > > > > > however, this is a consequence of the fact that since
> > > > > > c84ec70b3a72
> > > > > > there is an inconsistency between what we do at the IR
> > > > > > level
> > > > > > regarding
> > > > > > execution size of HF conversions and what the EU validator
> > > > > > is
> > > > > > doing,
> > > > > > and from that perspective this is really fixing an
> > > > > > inconsistency
> > > > > > that
> > > > > > didn't exist before, and I thought we would want to address
> > > > > > that
> > > > > > sooner
> > > > > > rather than later and track it down to the original change
> > > > > > that
> > > > > > introduced that inconsistency so we know where this is
> > > > > > coming
> > > > > > from.
> > > > > > 
> > > > > 
> > > > > The "inconsistency" between the IR's get_exec_type() and the
> > > > > EU
> > > > > validator's execution_type() has existed ever since
> > > > > a05b6f25bf4bfad7
> > > > > removed the HF assert from get_exec_type() without actually
> > > > > implementing
> > > > > the code required to handle HF operands (which is what my
> > > > > commit
> > > > > c84ec70b3a72 did).
> > > > 
> > > > I agree with the fact that since a05b6f25bf4bfad7 the validator
> > > > could
> > > > reject valid code and that had nothing to do with your patch,
> > > 
> > > The validator rejected the same valid HF code since it was
> > > written,
> > > that
> > > had nothing to do with neither a05b6f25bf4bfad7 nor with my
> > > patch,
> > > and
> > > it is the real problem this patch was working around.
> > > 
> > > > but the inconsistency I am talking about here, that this patch
> > > > fixes,
> > > > is the one about get_exec_type() in the IR and execution_type()
> > > > in
> > > > the
> > > > validator doing different things for HF instructions, which
> > > > only
> > > > exists since your patch and which you discuss below.
> > > > 
> > > 
> > > The "inconsistency" exists ever since get_exec_type() was
> > > introduced
> > > without correct handling of HF types (even though
> > > execution_type()
> > > already attempted to handle it).  And I disagree that it's a real
> > > inconsistency except due to the fact that the validator is
> > > incorrectly
> > > attempting to validate the alignment of the destination region
> > > according
> > > to a rule that doesn't apply to HF types.
> > > 
> > > > > > Anyway, that was my rationale for the Fixes tag, but if you
> > > > > > think
> > > > > > this
> > > > > > is not useful I am happy to drop this patch and just
> > > > > > include it
> > > > > > as
> > > > > > part
> > > > > > of my series without the tag.
> > > > > > 
> > > > > 
> > > > > I'd like to see the actual regioning restrictions for HF
> > > > > types
> > > > > implemented in the EU validator as part of your series.
> > > > 
> > > > Ok, let's see if we can agree on what restrictions should we
> > > > implement
> > > > then. I can implement this restriction as documented:
> > > > 
> > > > "Conversion between Integer and HF (Half Float) must be DWord-
> > > > aligned
> > > > and strided by a DWord on the destination"
> > > > 
> > > > Instead of trying to apply the general one that is currently
> > > > breaking.
> > > > That will fix the assertion issue. I guess my issue with it was
> > > > that
> > > > going by your analysis this restriction is not telling the full
> > > > picture, this is what you had to say about this restriction:
> > > > 
> > > > "I have a feeling that the reason for this may be that the 16-
> > > > bit
> > > > pipeline lacks the ability to handle conversions from or to
> > > > half-
> > > > float, 
> > > > so the execution type is implicitly promoted to the matching
> > > > (integer
> > > > or floating-point) 32-bit type where any HF conversion would be
> > > > needed.  And on those the usual alignment restriction of the
> > > > destination to a larger execution type applies."
> > > > 
> > > > But I guess your point is that we can ignore these details at
> > > > the
> > > > validator level and just focus on validating strictly what the
> > > > PRM
> > > > says. Fair enough.
> > > > 
> > > > Unfortunatley, you also mentioned that this restriction *seems*
> > > > to
> > > > be
> > > > overriden by this one on all platforms but BDW (even though the
> > > > both
> > > > are listed, at least I see both in my SKL docs):
> > > > 
> > > > "There is a relaxed alignment rule for word destinations. When
> > > > the
> > > > destination type is word (UW, W, HF), destination data types
> > > > can be
> > > > aligned to either the lowest word or the second lowest word of
> > > > the
> > > > execution channel. This means the destination data words can be
> > > > either
> > > > all in the even word locations or all in the odd word
> > > > locations."
> > > > 
> > > 
> > > I'd implement this one since it seems like the most specific,
> > > except
> > > on
> > > BDW where only the previous (almost equivalent) restriction seems
> > > to
> > > apply. 
> > 
> > I have just realized that this restriction could be at odds with
> > this
> > other aspect of the spec:
> > 
> > "There is no direct conversion from B/UB to DF or DF to B/UB. Use
> > two
> > instructions and a word or DWord intermediate type."
> > 
> > So it is okay to convert from B to W and then from W to DF/Q (it
> > has
> > the same text from conversions between HF and 64-bit types). We
> > actually emit these conversions and they work fine going by the
> > existing CTS tests, but these conversions are not aligned to 32-bit
> > like the restriction states, instead they are aligned to 64-bit
> > (the
> > regioning lowering pass does this):
> > 
> > mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };
> > 
> > So I think this is a bit inconsistent again and I guess we need to
> > choose what we want to do. I can think of 4 options:
> > 
> > 1. We could assume that the restriction is correctly formulated and
> > split the instructions to respect the restriction as-is even if we
> > have
> > not found any evidence that this doesn't work
> > 
> > 2. We could assume that the restriction is correctly formulated and
> > the
> > regioning lowering pass need to be fixed to generate a conversion
> > with
> > a stride of 2 even if the execution size if 64-bit (haven't tried
> > this
> > since it sounds sketchy to me).
> > 
> > 3. We could understand that the restriction is strictly formulated
> > for
> > conversions from a 32-bit execution types, and decide that it does
> > not
> > apply to conversions from 64-bit sources.
> > 
> > 4. We can assume that the restriction is formulated incorrectly and
> > instead of saying that destination words should all be in even or
> > odd
> > slots, assuming a conversion from a 32-bit type, it should just say
> > that they should be aligned to the execution type to also
> > accomodate
> > conversions from 64-bit types.
> > 
> 
> 4. is basically what the back-end is assuming right now, but it is
> indeed not literally equivalent to the regioning restrictions as
> they're
> formulated in the B-Spec.  I'd suggest not validating the DWORD
> alignment rule for HF instructions whenever the execution type is
> greater than 32 bits, since the B-Spec language about those is rather
> inconsistent.

Ok, I will do that. Just one thing: I guess that when you say HF
instructions you mean 16-bit destinations, since that is the target of
the restriction.

> > And indepedently of how we decide to interpret the restriction in
> > the
> > end, given that we have discussed here that we should only validate
> > things as they are described in the PRM, I think we also need to
> > decide
> > what we implement in the validator (if anything) if we decide to go
> > with either 3) or 4) (were we would not be exactly following the
> > PRM to
> > the letter).
> > 
> > I kind of think that 4) might be what is going on here...
> > what do you think?
> > 
> > >  There are shitloads of other restrictions we're missing for HF
> > > types BTW, check out the section "Special Requirements for
> > > Handling
> > > Mixed Mode Float Operations" of the hardware spec.
> > > 
> > > > Which you discussed didn't make sense to you and I agreed, if
> > > > only
> > > > because my own experiments suggested that the implications that
> > > > one
> > > > would derive from a strict interpretation of this (that packed
> > > > 16-
> > > > bit
> > > > data is not supported) are not quite true going by the fact
> > > > that we
> > > > are
> > > > emitting packed instructions on all platforms without any
> > > > indication
> > > > that this doesn't work. So what should we do about this one? If
> > > > we
> > > > decide that we want to implement this then we have to re-think
> > > > the
> > > > implementation we have.
> > > > 
> > > 
> > > The implication on packed HF instructions is likely accidental, I
> > > don't
> > > think we should enforce the rule except for conversion
> > > operations.
> > > 
> > > > Should we just validate the one about the integer/float
> > > > conversions?
> > > > Should we do that only on BDW or on all platforms?
> > > > 
> > > > >   I don't see the
> > > > > need to introduce any empirical assumptions into the EU
> > > > > validator's
> > > > > execution_type() in order to achieve that, even if that means
> > > > > that
> > > > > get_exec_type() and execution_type() don't do the exact same
> > > > > calculation
> > > > > -- What you call an inconsistency is the consequence of
> > > > > execution_type()
> > > > > being the hardware spec's opinion on what the execution type
> > > > > is,
> > > > > which
> > > > > we assume is what we need to use while enforcing a regioning
> > > > > restriction
> > > > > that refers to the execution type of the instruction.
> > > > 
> > > > Mmm... I guess my question is: is the hardware really assuming
> > > > that
> > > > execution type in all cases that involve HF instructions? What
> > > > I
> > > > got
> > > > from your analysis of the whole situation is that the hardware
> > > > is
> > > > actually promoting the execution size to 32-bit in some cases,
> > > > and
> > > > if
> > > > that is the case, then does it make sense to implement code in
> > > > the
> > > > validator that ignores that fact?
> > > 
> > > If the hardware spec chose to ignore that fact and instead gave
> > > us a
> > > different set of regioning restrictions for us to use on HF
> > > restrictions
> > > that don't rely on the definition of execution type, we should
> > > probably
> > > have the validator match the hardware spec literally.
> > > 
> > > > I understand that you think this doesn't have any significance
> > > > once
> > > > we
> > > > make sure that we only apply the restrictions specifically
> > > > documented
> > > > for HF from the PRM, and you are probably right, however I have
> > > > to
> > > > admit that it feels a bit odd to me that we do that when we
> > > > know
> > > > that
> > > > under the hood there is much more going on and we are producing
> > > > FS
> > > > IR
> > > > code following different rules after all. But maybe it is just
> > > > me
> > > > being too paranoid about this...
> > > > 
> > > 
> > > Wouldn't it be even odder to have the hardware restriction
> > > validator
> > > diverge from the documentation it's supposedly validating?
> > > 
> > > > 
> > > > > > >   The validator is
> > > > > > > still missing an implementation of the quirky HF
> > > > > > > restrictions,
> > > > > > > and it
> > > > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
> > > > > > 
> > > > > > While this is true in general, the EU validator does
> > > > > > consider
> > > > > > the
> > > > > > execution type of the instruction to validate general rules
> > > > > > such as
> > > > > > the
> > > > > > one I mentioned in the commit message in this patch. And
> > > > > > that
> > > > > > part
> > > > > > of
> > > > > > the validator is inconsistent with c84ec70b3a72.
> > > > > 
> > > > > That part of the validator was also inconsistent with the
> > > > > code
> > > > > generated
> > > > > by your original VK_KHR_shader_float16_int8 series even
> > > > > before I
> > > > > committed c84ec70b3a72.  The reason is that it is trying to
> > > > > validate
> > > > > a
> > > > > restriction that rejects working code, because the "general"
> > > > > rule
> > > > > it's
> > > > > trying to enforce isn't supposed to apply to instructions
> > > > > with HF
> > > > > operands, which is the real bug.
> > > > 
> > > > I agree with that, and I'll fix that in my series by preventing
> > > > that
> > > > rule to apply to these instructions (pending to agree on the
> > > > rules
> > > > that
> > > > we should validate instead), but as I have already explained
> > > > above,
> > > > that was not the inconsistency I was referring to.
> > > > 
> > > 
> > > It's the only actual inconsistency this was fixing.
> > > 
> > > > > > In fact, the EU validator is accounting for execution size
> > > > > > promotion
> > > > > > of HF instructions to 32-bit in SKL+ and CHV only, for
> > > > > > conversions
> > > > > > from HF->F and mixed float mode instructions... which is
> > > > > > part
> > > > > > of
> > > > > > what
> > > > > > c84ec70b3a72 addresses at the IR level, which it actually
> > > > > > does
> > > > > > for
> > > > > > all
> > > > > > hardware platforms and in more cases.
> > > > > > 
> > > > > 
> > > > > I'm fine with fixing execution_type() to do the right thing
> > > > > in
> > > > > more
> > > > > cases and platforms, but I don't think that should involve
> > > > > making
> > > > > empirical assumptions in the validator.
> > > > 
> > > > Just to be clear, in the particular case we are discussing
> > > > here, I
> > > > understand this means that we should leave the validator's
> > > > execution_type() as-is.
> > > > 
> > > 
> > > That's not what I was saying.  Last time I looked at
> > > execution_type()
> > > it
> > > did seem somewhat sketchy regarding HF types.  I wouldn't be
> > > surprised
> > > if it needs some fixes to match the hardware spec (*not* our
> > > empirical
> > > knowledge of the hardware) in addition to the above.
> > > 
> > > > > > >   You *should*
> > > > > > > definitely implement those restrictions (as they're
> > > > > > > stated in
> > > > > > > the
> > > > > > > hardware spec, without empirical assumptions) in the
> > > > > > > validator as
> > > > > > > part
> > > > > > > of your VK_KHR_shader_float16_int8 series,
> > > > > > 
> > > > > > Again, I am not sure what you mean by "without empirical
> > > > > > assumptions".
> > > > > 
> > > > > I was just paraphrasing your comment.  If you feel the need
> > > > > to
> > > > > write
> > > > > a
> > > > > comment justifying the existence of some code in the
> > > > > validator
> > > > > based
> > > > > on
> > > > > empirical testing, it probably doesn't belong in the
> > > > > validator.
> > > > > 
> > > > > > According to the commit message in c84ec70b3a72 "the docs
> > > > > > are
> > > > > > fairly
> > > > > > imcomplete and inconsistent" and you explained here that
> > > > > > you
> > > > > > had to
> > > > > > do
> > > > > > some experimentation of your own using the simulator (where
> > > > > > you
> > > > > > found
> > > > > > its results to also be inconsistent with the hardware docs)
> > > > > > to
> > > > > > try
> > > > > > and
> > > > > > guess what is happening. That's why I was using the term
> > > > > > "empirical"
> > > > > > here, since the hardware docs alone aren't clear or correct
> > > > > > enough
> > > > > > to
> > > > > > understand what is really happening, when and in what
> > > > > > platforms.
> > > > > > 
> > > > > 
> > > > > If some hardware restriction is described in a contradictory
> > > > > or
> > > > > ambiguous way by the hardware spec we should probably avoid
> > > > > enforcing
> > > > > it
> > > > > altogether in the EU validator.
> > > > 
> > > > Okay, in that case I guess you would rule the restriction about
> > > > the
> > > > relaxed alignment rule for Word destinations as such, but not
> > > > the
> > > > one
> > > > about half-float/integer conversions. Is that correct?
> > > > 
> > > 
> > > We probably need to enforce both since they apply to different
> > > sets
> > > of
> > > hardware.
> > > 
> > > > > > Anyway, if you don't like the term "empirical" to refer to
> > > > > > all
> > > > > > this,
> > > > > > that's fine by me, but what I need to know is if we agree
> > > > > > that
> > > > > > we
> > > > > > need
> > > > > > to implement the same type promotion rules in the validator
> > > > > > that we
> > > > > > are
> > > > > > implementing in the IR, indepedently of whether refer to
> > > > > > them
> > > > > > as
> > > > > > "based
> > > > > > on empirical testing" or not.
> > > > > > 
> > > > > 
> > > > > I don't think we agree on that.  IMHO you want to enforce the
> > > > > regioning
> > > > > restrictions that the hardware spec describes for HF types
> > > > > rather
> > > > > than
> > > > > the current (incorrect and incomplete) generalization of the
> > > > > standard
> > > > > restrictions.  With those in place the apparent inconsistency
> > > > > between
> > > > > get_exec_type() and the hardware spec's execution type would
> > > > > be
> > > > > harmless.
> > > > 
> > > > I agree with that. I still feel uneasy about having two
> > > > different 
> > > > implementations of the execution type in FS IR and the
> > > > validator,
> > > > even
> > > > if that doesn't lead to any issues when we do what you say
> > > > here...
> > > > 
> > > 
> > > I feel uneasy about the validator not matching the hardware spec.
> > > Luckily its only purpose is validating the rest of the compiler,
> > > so
> > > if
> > > there is an actual inconsistency as you seem to think, it will
> > > come
> > > up.
> > > 
> > > > At the very least I think that deserves a comment in the
> > > > validator
> > > > explaining why we think that is okay.
> > > > 
> > > 
> > > A quote of the hardware spec should be enough of a
> > > comment.  Right?  And
> > > there is already a comment in get_exec_type() explaining why its
> > > HF
> > > type
> > > handling logic is consistent with the hardware spec restrictions
> > > for
> > > HF
> > > types.
> > > 
> > > > > > >  if anything because currently
> > > > > > > it will reject working code that uses HF types.
> > > > > > 
> > > > > > Just for the sake of clarity, since that sentence above
> > > > > > could
> > > > > > be
> > > > > > interpreted as if all HF code would be rejected: we have
> > > > > > been
> > > > > > using
> > > > > > HF
> > > > > > types since we landed VK_KHR_16bit_storage, which includes
> > > > > > conversions
> > > > > > between HF and F and the EU validator is not complaining
> > > > > > about
> > > > > > any
> > > > > > of
> > > > > > that. It is currently a problem only with conversions to
> > > > > > smaller
> > > > > > types
> > > > > > (so only conversions to Byte) because that's where we check
> > > > > > for
> > > > > > that
> > > > > > restriction on the stride, which is based on the execution
> > > > > > type
> > > > > > of
> > > > > > the
> > > > > > instruction.
> > > > > > 
> > > > > > > 
> > > > > > > > ---
> > > > > > > >  src/intel/compiler/brw_eu_validate.c | 27
> > > > > > > > ++++++++++++++
> > > > > > > > ----
> > > > > > > > ----
> > > > > > > > -----
> > > > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > index a25010b225c..3bb37677672 100644
> > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > > > > > gen_device_info
> > > > > > > > *devinfo, const brw_inst *inst)
> > > > > > > >     unsigned num_sources =
> > > > > > > > num_sources_from_inst(devinfo,
> > > > > > > > inst);
> > > > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > > > > > >  
> > > > > > > > -   /* Execution data type is independent of
> > > > > > > > destination
> > > > > > > > data
> > > > > > > > type,
> > > > > > > > except in
> > > > > > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > > > > > +   /* Empirical testing suggests that type conversions
> > > > > > > > involving
> > > > > > > > half-float
> > > > > > > > +    * promote execution type to 32-bit. See
> > > > > > > > get_exec_type() in
> > > > > > > > brw_ir_fs.h.
> > > > > > > >      */
> > > > > > > >     enum brw_reg_type dst_exec_type =
> > > > > > > > brw_inst_dst_type(devinfo,
> > > > > > > > inst);
> > > > > > > >  
> > > > > > > >     src0_exec_type =
> > > > > > > > execution_type_for_type(brw_inst_src0_type(devinfo,
> > > > > > > > inst));
> > > > > > > >     if (num_sources == 1) {
> > > > > > > > -      if ((devinfo->gen >= 9 || devinfo-
> > > > > > > > >is_cherryview) &&
> > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > > > > > -         return dst_exec_type;
> > > > > > > > +      if (type_sz(src0_exec_type) == 2 &&
> > > > > > > > dst_exec_type !=
> > > > > > > > src0_exec_type) {
> > > > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > > > +            return BRW_REGISTER_TYPE_F;
> > > > > > > > +         else if (dst_exec_type ==
> > > > > > > > BRW_REGISTER_TYPE_HF)
> > > > > > > > +            return BRW_REGISTER_TYPE_D;
> > > > > > > >        }
> > > > > > > > +
> > > > > > > >        return src0_exec_type;
> > > > > > > >     }
> > > > > > > >  
> > > > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > > > > > gen_device_info
> > > > > > > > *devinfo, const brw_inst *inst)
> > > > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > > > > > >        return BRW_REGISTER_TYPE_DF;
> > > > > > > >  
> > > > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
> > > > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > > -         return BRW_REGISTER_TYPE_F;
> > > > > > > > -      } else {
> > > > > > > > -         return BRW_REGISTER_TYPE_HF;
> > > > > > > > -      }
> > > > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > > +      return BRW_REGISTER_TYPE_F;
> > > > > > > > +   } else {
> > > > > > > > +      return BRW_REGISTER_TYPE_HF;
> > > > > > > >     }
> > > > > > > >  
> > > > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > > > > > -- 
> > > > > > > > 2.17.1
On Mon, 2019-02-04 at 08:50 +0100, Iago Toral wrote:
> On Fri, 2019-02-01 at 11:23 -0800, Francisco Jerez wrote:
> > Iago Toral <itoral@igalia.com> writes:
> > 
> > > On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> > > > Iago Toral <itoral@igalia.com> writes:
> > > > 
> > > > > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > > > > Iago Toral <itoral@igalia.com> writes:
> > > > > > 
> > > > > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
> > > > > > > > 
> > > > > > > > > Commit c84ec70b3a72 implemented execution type
> > > > > > > > > promotion to
> > > > > > > > > 32-
> > > > > > > > > bit
> > > > > > > > > for
> > > > > > > > > conversions involving half-float registers, which
> > > > > > > > > empirical
> > > > > > > > > testing
> > > > > > > > > suggested
> > > > > > > > > was required, but it did not incorporate this change
> > > > > > > > > into
> > > > > > > > > the
> > > > > > > > > assembly validator
> > > > > > > > > logic. This commits adds that, preventing validation
> > > > > > > > > errors
> > > > > > > > > like
> > > > > > > > > this:
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > I don't think we should be validating empirical
> > > > > > > > assumptions
> > > > > > > > in
> > > > > > > > the EU
> > > > > > > > validator.
> > > > > > > 
> > > > > > > I am not sure I get your point, isn't c84ec70b3a72 also
> > > > > > > based
> > > > > > > on
> > > > > > > empirical testing after all?
> > > > > > > 
> > > > > > 
> > > > > > To some extent, but it doesn't attempt to enforce ISA
> > > > > > restrictions
> > > > > > based
> > > > > > on information obtained empirically.
> > > > > > 
> > > > > > > 
> > > > > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
> > > > > > > > > ERROR: Destination stride must be equal to the ratio
> > > > > > > > > of
> > > > > > > > > the
> > > > > > > > > sizes
> > > > > > > > > of the
> > > > > > > > >        execution data type to the destination type
> > > > > > > > > 
> > > > > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type
> > > > > > > > > to
> > > > > > > > > 32-bit
> > > > > > > > > when any half-float conversion is needed."
> > > > > > > > 
> > > > > > > > I don't think this "fixes" anything that ever worked.
> > > > > > > 
> > > > > > > It is true that the code in that trace above is not
> > > > > > > something
> > > > > > > we
> > > > > > > can
> > > > > > > produce right now, because it is a conversion from HF to
> > > > > > > B
> > > > > > > and
> > > > > > > that
> > > > > > > should only happen within the context of
> > > > > > > VK_KHR_shader_float16_int8,
> > > > > > > however, this is a consequence of the fact that since
> > > > > > > c84ec70b3a72
> > > > > > > there is an inconsistency between what we do at the IR
> > > > > > > level
> > > > > > > regarding
> > > > > > > execution size of HF conversions and what the EU
> > > > > > > validator
> > > > > > > is
> > > > > > > doing,
> > > > > > > and from that perspective this is really fixing an
> > > > > > > inconsistency
> > > > > > > that
> > > > > > > didn't exist before, and I thought we would want to
> > > > > > > address
> > > > > > > that
> > > > > > > sooner
> > > > > > > rather than later and track it down to the original
> > > > > > > change
> > > > > > > that
> > > > > > > introduced that inconsistency so we know where this is
> > > > > > > coming
> > > > > > > from.
> > > > > > > 
> > > > > > 
> > > > > > The "inconsistency" between the IR's get_exec_type() and
> > > > > > the
> > > > > > EU
> > > > > > validator's execution_type() has existed ever since
> > > > > > a05b6f25bf4bfad7
> > > > > > removed the HF assert from get_exec_type() without actually
> > > > > > implementing
> > > > > > the code required to handle HF operands (which is what my
> > > > > > commit
> > > > > > c84ec70b3a72 did).
> > > > > 
> > > > > I agree with the fact that since a05b6f25bf4bfad7 the
> > > > > validator
> > > > > could
> > > > > reject valid code and that had nothing to do with your patch,
> > > > 
> > > > The validator rejected the same valid HF code since it was
> > > > written,
> > > > that
> > > > had nothing to do with neither a05b6f25bf4bfad7 nor with my
> > > > patch,
> > > > and
> > > > it is the real problem this patch was working around.
> > > > 
> > > > > but the inconsistency I am talking about here, that this
> > > > > patch
> > > > > fixes,
> > > > > is the one about get_exec_type() in the IR and
> > > > > execution_type()
> > > > > in
> > > > > the
> > > > > validator doing different things for HF instructions, which
> > > > > only
> > > > > exists since your patch and which you discuss below.
> > > > > 
> > > > 
> > > > The "inconsistency" exists ever since get_exec_type() was
> > > > introduced
> > > > without correct handling of HF types (even though
> > > > execution_type()
> > > > already attempted to handle it).  And I disagree that it's a
> > > > real
> > > > inconsistency except due to the fact that the validator is
> > > > incorrectly
> > > > attempting to validate the alignment of the destination region
> > > > according
> > > > to a rule that doesn't apply to HF types.
> > > > 
> > > > > > > Anyway, that was my rationale for the Fixes tag, but if
> > > > > > > you
> > > > > > > think
> > > > > > > this
> > > > > > > is not useful I am happy to drop this patch and just
> > > > > > > include it
> > > > > > > as
> > > > > > > part
> > > > > > > of my series without the tag.
> > > > > > > 
> > > > > > 
> > > > > > I'd like to see the actual regioning restrictions for HF
> > > > > > types
> > > > > > implemented in the EU validator as part of your series.
> > > > > 
> > > > > Ok, let's see if we can agree on what restrictions should we
> > > > > implement
> > > > > then. I can implement this restriction as documented:
> > > > > 
> > > > > "Conversion between Integer and HF (Half Float) must be
> > > > > DWord-
> > > > > aligned
> > > > > and strided by a DWord on the destination"
> > > > > 
> > > > > Instead of trying to apply the general one that is currently
> > > > > breaking.
> > > > > That will fix the assertion issue. I guess my issue with it
> > > > > was
> > > > > that
> > > > > going by your analysis this restriction is not telling the
> > > > > full
> > > > > picture, this is what you had to say about this restriction:
> > > > > 
> > > > > "I have a feeling that the reason for this may be that the
> > > > > 16-
> > > > > bit
> > > > > pipeline lacks the ability to handle conversions from or to
> > > > > half-
> > > > > float, 
> > > > > so the execution type is implicitly promoted to the matching
> > > > > (integer
> > > > > or floating-point) 32-bit type where any HF conversion would
> > > > > be
> > > > > needed.  And on those the usual alignment restriction of the
> > > > > destination to a larger execution type applies."
> > > > > 
> > > > > But I guess your point is that we can ignore these details at
> > > > > the
> > > > > validator level and just focus on validating strictly what
> > > > > the
> > > > > PRM
> > > > > says. Fair enough.
> > > > > 
> > > > > Unfortunatley, you also mentioned that this restriction
> > > > > *seems*
> > > > > to
> > > > > be
> > > > > overriden by this one on all platforms but BDW (even though
> > > > > the
> > > > > both
> > > > > are listed, at least I see both in my SKL docs):
> > > > > 
> > > > > "There is a relaxed alignment rule for word destinations.
> > > > > When
> > > > > the
> > > > > destination type is word (UW, W, HF), destination data types
> > > > > can be
> > > > > aligned to either the lowest word or the second lowest word
> > > > > of
> > > > > the
> > > > > execution channel. This means the destination data words can
> > > > > be
> > > > > either
> > > > > all in the even word locations or all in the odd word
> > > > > locations."
> > > > > 
> > > > 
> > > > I'd implement this one since it seems like the most specific,
> > > > except
> > > > on
> > > > BDW where only the previous (almost equivalent) restriction
> > > > seems
> > > > to
> > > > apply. 
> > > 
> > > I have just realized that this restriction could be at odds with
> > > this
> > > other aspect of the spec:
> > > 
> > > "There is no direct conversion from B/UB to DF or DF to B/UB. Use
> > > two
> > > instructions and a word or DWord intermediate type."
> > > 
> > > So it is okay to convert from B to W and then from W to DF/Q (it
> > > has
> > > the same text from conversions between HF and 64-bit types). We
> > > actually emit these conversions and they work fine going by the
> > > existing CTS tests, but these conversions are not aligned to 32-
> > > bit
> > > like the restriction states, instead they are aligned to 64-bit
> > > (the
> > > regioning lowering pass does this):
> > > 
> > > mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };
> > > 
> > > So I think this is a bit inconsistent again and I guess we need
> > > to
> > > choose what we want to do. I can think of 4 options:
> > > 
> > > 1. We could assume that the restriction is correctly formulated
> > > and
> > > split the instructions to respect the restriction as-is even if
> > > we
> > > have
> > > not found any evidence that this doesn't work
> > > 
> > > 2. We could assume that the restriction is correctly formulated
> > > and
> > > the
> > > regioning lowering pass need to be fixed to generate a conversion
> > > with
> > > a stride of 2 even if the execution size if 64-bit (haven't tried
> > > this
> > > since it sounds sketchy to me).
> > > 
> > > 3. We could understand that the restriction is strictly
> > > formulated
> > > for
> > > conversions from a 32-bit execution types, and decide that it
> > > does
> > > not
> > > apply to conversions from 64-bit sources.
> > > 
> > > 4. We can assume that the restriction is formulated incorrectly
> > > and
> > > instead of saying that destination words should all be in even or
> > > odd
> > > slots, assuming a conversion from a 32-bit type, it should just
> > > say
> > > that they should be aligned to the execution type to also
> > > accomodate
> > > conversions from 64-bit types.
> > > 
> > 
> > 4. is basically what the back-end is assuming right now, but it is
> > indeed not literally equivalent to the regioning restrictions as
> > they're
> > formulated in the B-Spec.  I'd suggest not validating the DWORD
> > alignment rule for HF instructions whenever the execution type is
> > greater than 32 bits, since the B-Spec language about those is
> > rather
> > inconsistent.

What about conversions like :B->:W or :W->:UW? These have a word
destination and would be subject to the same restriction, but the
execution type is not DWord, so we are not currently forcing DWord
alignment for them.  I have not seen anythig fail for these conversions
but if we implement the rule for them too we will fail validation. I
suppose we could decide to validate the rule only HF instructions like
you were (probably purposedly?) suggesting above.

> Ok, I will do that. Just one thing: I guess that when you say HF
> instructions you mean 16-bit destinations, since that is the target
> of
> the restriction.
> > > And indepedently of how we decide to interpret the restriction in
> > > the
> > > end, given that we have discussed here that we should only
> > > validate
> > > things as they are described in the PRM, I think we also need to
> > > decide
> > > what we implement in the validator (if anything) if we decide to
> > > go
> > > with either 3) or 4) (were we would not be exactly following the
> > > PRM to
> > > the letter).
> > > 
> > > I kind of think that 4) might be what is going on here...
> > > what do you think?
> > > 
> > > >  There are shitloads of other restrictions we're missing for HF
> > > > types BTW, check out the section "Special Requirements for
> > > > Handling
> > > > Mixed Mode Float Operations" of the hardware spec.
> > > > 
> > > > > Which you discussed didn't make sense to you and I agreed, if
> > > > > only
> > > > > because my own experiments suggested that the implications
> > > > > that
> > > > > one
> > > > > would derive from a strict interpretation of this (that
> > > > > packed
> > > > > 16-
> > > > > bit
> > > > > data is not supported) are not quite true going by the fact
> > > > > that we
> > > > > are
> > > > > emitting packed instructions on all platforms without any
> > > > > indication
> > > > > that this doesn't work. So what should we do about this one?
> > > > > If
> > > > > we
> > > > > decide that we want to implement this then we have to re-
> > > > > think
> > > > > the
> > > > > implementation we have.
> > > > > 
> > > > 
> > > > The implication on packed HF instructions is likely accidental,
> > > > I
> > > > don't
> > > > think we should enforce the rule except for conversion
> > > > operations.
> > > > 
> > > > > Should we just validate the one about the integer/float
> > > > > conversions?
> > > > > Should we do that only on BDW or on all platforms?
> > > > > 
> > > > > >   I don't see the
> > > > > > need to introduce any empirical assumptions into the EU
> > > > > > validator's
> > > > > > execution_type() in order to achieve that, even if that
> > > > > > means
> > > > > > that
> > > > > > get_exec_type() and execution_type() don't do the exact
> > > > > > same
> > > > > > calculation
> > > > > > -- What you call an inconsistency is the consequence of
> > > > > > execution_type()
> > > > > > being the hardware spec's opinion on what the execution
> > > > > > type
> > > > > > is,
> > > > > > which
> > > > > > we assume is what we need to use while enforcing a
> > > > > > regioning
> > > > > > restriction
> > > > > > that refers to the execution type of the instruction.
> > > > > 
> > > > > Mmm... I guess my question is: is the hardware really
> > > > > assuming
> > > > > that
> > > > > execution type in all cases that involve HF instructions?
> > > > > What
> > > > > I
> > > > > got
> > > > > from your analysis of the whole situation is that the
> > > > > hardware
> > > > > is
> > > > > actually promoting the execution size to 32-bit in some
> > > > > cases,
> > > > > and
> > > > > if
> > > > > that is the case, then does it make sense to implement code
> > > > > in
> > > > > the
> > > > > validator that ignores that fact?
> > > > 
> > > > If the hardware spec chose to ignore that fact and instead gave
> > > > us a
> > > > different set of regioning restrictions for us to use on HF
> > > > restrictions
> > > > that don't rely on the definition of execution type, we should
> > > > probably
> > > > have the validator match the hardware spec literally.
> > > > 
> > > > > I understand that you think this doesn't have any
> > > > > significance
> > > > > once
> > > > > we
> > > > > make sure that we only apply the restrictions specifically
> > > > > documented
> > > > > for HF from the PRM, and you are probably right, however I
> > > > > have
> > > > > to
> > > > > admit that it feels a bit odd to me that we do that when we
> > > > > know
> > > > > that
> > > > > under the hood there is much more going on and we are
> > > > > producing
> > > > > FS
> > > > > IR
> > > > > code following different rules after all. But maybe it is
> > > > > just
> > > > > me
> > > > > being too paranoid about this...
> > > > > 
> > > > 
> > > > Wouldn't it be even odder to have the hardware restriction
> > > > validator
> > > > diverge from the documentation it's supposedly validating?
> > > > 
> > > > > 
> > > > > > > >   The validator is
> > > > > > > > still missing an implementation of the quirky HF
> > > > > > > > restrictions,
> > > > > > > > and it
> > > > > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
> > > > > > > 
> > > > > > > While this is true in general, the EU validator does
> > > > > > > consider
> > > > > > > the
> > > > > > > execution type of the instruction to validate general
> > > > > > > rules
> > > > > > > such as
> > > > > > > the
> > > > > > > one I mentioned in the commit message in this patch. And
> > > > > > > that
> > > > > > > part
> > > > > > > of
> > > > > > > the validator is inconsistent with c84ec70b3a72.
> > > > > > 
> > > > > > That part of the validator was also inconsistent with the
> > > > > > code
> > > > > > generated
> > > > > > by your original VK_KHR_shader_float16_int8 series even
> > > > > > before I
> > > > > > committed c84ec70b3a72.  The reason is that it is trying to
> > > > > > validate
> > > > > > a
> > > > > > restriction that rejects working code, because the
> > > > > > "general"
> > > > > > rule
> > > > > > it's
> > > > > > trying to enforce isn't supposed to apply to instructions
> > > > > > with HF
> > > > > > operands, which is the real bug.
> > > > > 
> > > > > I agree with that, and I'll fix that in my series by
> > > > > preventing
> > > > > that
> > > > > rule to apply to these instructions (pending to agree on the
> > > > > rules
> > > > > that
> > > > > we should validate instead), but as I have already explained
> > > > > above,
> > > > > that was not the inconsistency I was referring to.
> > > > > 
> > > > 
> > > > It's the only actual inconsistency this was fixing.
> > > > 
> > > > > > > In fact, the EU validator is accounting for execution
> > > > > > > size
> > > > > > > promotion
> > > > > > > of HF instructions to 32-bit in SKL+ and CHV only, for
> > > > > > > conversions
> > > > > > > from HF->F and mixed float mode instructions... which is
> > > > > > > part
> > > > > > > of
> > > > > > > what
> > > > > > > c84ec70b3a72 addresses at the IR level, which it actually
> > > > > > > does
> > > > > > > for
> > > > > > > all
> > > > > > > hardware platforms and in more cases.
> > > > > > > 
> > > > > > 
> > > > > > I'm fine with fixing execution_type() to do the right thing
> > > > > > in
> > > > > > more
> > > > > > cases and platforms, but I don't think that should involve
> > > > > > making
> > > > > > empirical assumptions in the validator.
> > > > > 
> > > > > Just to be clear, in the particular case we are discussing
> > > > > here, I
> > > > > understand this means that we should leave the validator's
> > > > > execution_type() as-is.
> > > > > 
> > > > 
> > > > That's not what I was saying.  Last time I looked at
> > > > execution_type()
> > > > it
> > > > did seem somewhat sketchy regarding HF types.  I wouldn't be
> > > > surprised
> > > > if it needs some fixes to match the hardware spec (*not* our
> > > > empirical
> > > > knowledge of the hardware) in addition to the above.
> > > > 
> > > > > > > >   You *should*
> > > > > > > > definitely implement those restrictions (as they're
> > > > > > > > stated in
> > > > > > > > the
> > > > > > > > hardware spec, without empirical assumptions) in the
> > > > > > > > validator as
> > > > > > > > part
> > > > > > > > of your VK_KHR_shader_float16_int8 series,
> > > > > > > 
> > > > > > > Again, I am not sure what you mean by "without empirical
> > > > > > > assumptions".
> > > > > > 
> > > > > > I was just paraphrasing your comment.  If you feel the need
> > > > > > to
> > > > > > write
> > > > > > a
> > > > > > comment justifying the existence of some code in the
> > > > > > validator
> > > > > > based
> > > > > > on
> > > > > > empirical testing, it probably doesn't belong in the
> > > > > > validator.
> > > > > > 
> > > > > > > According to the commit message in c84ec70b3a72 "the docs
> > > > > > > are
> > > > > > > fairly
> > > > > > > imcomplete and inconsistent" and you explained here that
> > > > > > > you
> > > > > > > had to
> > > > > > > do
> > > > > > > some experimentation of your own using the simulator
> > > > > > > (where
> > > > > > > you
> > > > > > > found
> > > > > > > its results to also be inconsistent with the hardware
> > > > > > > docs)
> > > > > > > to
> > > > > > > try
> > > > > > > and
> > > > > > > guess what is happening. That's why I was using the term
> > > > > > > "empirical"
> > > > > > > here, since the hardware docs alone aren't clear or
> > > > > > > correct
> > > > > > > enough
> > > > > > > to
> > > > > > > understand what is really happening, when and in what
> > > > > > > platforms.
> > > > > > > 
> > > > > > 
> > > > > > If some hardware restriction is described in a
> > > > > > contradictory
> > > > > > or
> > > > > > ambiguous way by the hardware spec we should probably avoid
> > > > > > enforcing
> > > > > > it
> > > > > > altogether in the EU validator.
> > > > > 
> > > > > Okay, in that case I guess you would rule the restriction
> > > > > about
> > > > > the
> > > > > relaxed alignment rule for Word destinations as such, but not
> > > > > the
> > > > > one
> > > > > about half-float/integer conversions. Is that correct?
> > > > > 
> > > > 
> > > > We probably need to enforce both since they apply to different
> > > > sets
> > > > of
> > > > hardware.
> > > > 
> > > > > > > Anyway, if you don't like the term "empirical" to refer
> > > > > > > to
> > > > > > > all
> > > > > > > this,
> > > > > > > that's fine by me, but what I need to know is if we agree
> > > > > > > that
> > > > > > > we
> > > > > > > need
> > > > > > > to implement the same type promotion rules in the
> > > > > > > validator
> > > > > > > that we
> > > > > > > are
> > > > > > > implementing in the IR, indepedently of whether refer to
> > > > > > > them
> > > > > > > as
> > > > > > > "based
> > > > > > > on empirical testing" or not.
> > > > > > > 
> > > > > > 
> > > > > > I don't think we agree on that.  IMHO you want to enforce
> > > > > > the
> > > > > > regioning
> > > > > > restrictions that the hardware spec describes for HF types
> > > > > > rather
> > > > > > than
> > > > > > the current (incorrect and incomplete) generalization of
> > > > > > the
> > > > > > standard
> > > > > > restrictions.  With those in place the apparent
> > > > > > inconsistency
> > > > > > between
> > > > > > get_exec_type() and the hardware spec's execution type
> > > > > > would
> > > > > > be
> > > > > > harmless.
> > > > > 
> > > > > I agree with that. I still feel uneasy about having two
> > > > > different 
> > > > > implementations of the execution type in FS IR and the
> > > > > validator,
> > > > > even
> > > > > if that doesn't lead to any issues when we do what you say
> > > > > here...
> > > > > 
> > > > 
> > > > I feel uneasy about the validator not matching the hardware
> > > > spec.
> > > > Luckily its only purpose is validating the rest of the
> > > > compiler,
> > > > so
> > > > if
> > > > there is an actual inconsistency as you seem to think, it will
> > > > come
> > > > up.
> > > > 
> > > > > At the very least I think that deserves a comment in the
> > > > > validator
> > > > > explaining why we think that is okay.
> > > > > 
> > > > 
> > > > A quote of the hardware spec should be enough of a
> > > > comment.  Right?  And
> > > > there is already a comment in get_exec_type() explaining why
> > > > its
> > > > HF
> > > > type
> > > > handling logic is consistent with the hardware spec
> > > > restrictions
> > > > for
> > > > HF
> > > > types.
> > > > 
> > > > > > > >  if anything because currently
> > > > > > > > it will reject working code that uses HF types.
> > > > > > > 
> > > > > > > Just for the sake of clarity, since that sentence above
> > > > > > > could
> > > > > > > be
> > > > > > > interpreted as if all HF code would be rejected: we have
> > > > > > > been
> > > > > > > using
> > > > > > > HF
> > > > > > > types since we landed VK_KHR_16bit_storage, which
> > > > > > > includes
> > > > > > > conversions
> > > > > > > between HF and F and the EU validator is not complaining
> > > > > > > about
> > > > > > > any
> > > > > > > of
> > > > > > > that. It is currently a problem only with conversions to
> > > > > > > smaller
> > > > > > > types
> > > > > > > (so only conversions to Byte) because that's where we
> > > > > > > check
> > > > > > > for
> > > > > > > that
> > > > > > > restriction on the stride, which is based on the
> > > > > > > execution
> > > > > > > type
> > > > > > > of
> > > > > > > the
> > > > > > > instruction.
> > > > > > > 
> > > > > > > > 
> > > > > > > > > ---
> > > > > > > > >  src/intel/compiler/brw_eu_validate.c | 27
> > > > > > > > > ++++++++++++++
> > > > > > > > > ----
> > > > > > > > > ----
> > > > > > > > > -----
> > > > > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > index a25010b225c..3bb37677672 100644
> > > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
> > > > > > > > > gen_device_info
> > > > > > > > > *devinfo, const brw_inst *inst)
> > > > > > > > >     unsigned num_sources =
> > > > > > > > > num_sources_from_inst(devinfo,
> > > > > > > > > inst);
> > > > > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
> > > > > > > > >  
> > > > > > > > > -   /* Execution data type is independent of
> > > > > > > > > destination
> > > > > > > > > data
> > > > > > > > > type,
> > > > > > > > > except in
> > > > > > > > > -    * mixed F/HF instructions on CHV and SKL+.
> > > > > > > > > +   /* Empirical testing suggests that type
> > > > > > > > > conversions
> > > > > > > > > involving
> > > > > > > > > half-float
> > > > > > > > > +    * promote execution type to 32-bit. See
> > > > > > > > > get_exec_type() in
> > > > > > > > > brw_ir_fs.h.
> > > > > > > > >      */
> > > > > > > > >     enum brw_reg_type dst_exec_type =
> > > > > > > > > brw_inst_dst_type(devinfo,
> > > > > > > > > inst);
> > > > > > > > >  
> > > > > > > > >     src0_exec_type =
> > > > > > > > > execution_type_for_type(brw_inst_src0_type(devinfo,
> > > > > > > > > inst));
> > > > > > > > >     if (num_sources == 1) {
> > > > > > > > > -      if ((devinfo->gen >= 9 || devinfo-
> > > > > > > > > > is_cherryview) &&
> > > > > > > > > 
> > > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
> > > > > > > > > -         return dst_exec_type;
> > > > > > > > > +      if (type_sz(src0_exec_type) == 2 &&
> > > > > > > > > dst_exec_type !=
> > > > > > > > > src0_exec_type) {
> > > > > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
> > > > > > > > > +            return BRW_REGISTER_TYPE_F;
> > > > > > > > > +         else if (dst_exec_type ==
> > > > > > > > > BRW_REGISTER_TYPE_HF)
> > > > > > > > > +            return BRW_REGISTER_TYPE_D;
> > > > > > > > >        }
> > > > > > > > > +
> > > > > > > > >        return src0_exec_type;
> > > > > > > > >     }
> > > > > > > > >  
> > > > > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
> > > > > > > > > gen_device_info
> > > > > > > > > *devinfo, const brw_inst *inst)
> > > > > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
> > > > > > > > >        return BRW_REGISTER_TYPE_DF;
> > > > > > > > >  
> > > > > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview)
> > > > > > > > > {
> > > > > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > > > -         return BRW_REGISTER_TYPE_F;
> > > > > > > > > -      } else {
> > > > > > > > > -         return BRW_REGISTER_TYPE_HF;
> > > > > > > > > -      }
> > > > > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
> > > > > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
> > > > > > > > > +      return BRW_REGISTER_TYPE_F;
> > > > > > > > > +   } else {
> > > > > > > > > +      return BRW_REGISTER_TYPE_HF;
> > > > > > > > >     }
> > > > > > > > >  
> > > > > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
> > > > > > > > > -- 
> > > > > > > > > 2.17.1
Iago Toral <itoral@igalia.com> writes:

> On Mon, 2019-02-04 at 08:50 +0100, Iago Toral wrote:
>> On Fri, 2019-02-01 at 11:23 -0800, Francisco Jerez wrote:
>> > Iago Toral <itoral@igalia.com> writes:
>> > 
>> > > On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
>> > > > Iago Toral <itoral@igalia.com> writes:
>> > > > 
>> > > > > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
>> > > > > > Iago Toral <itoral@igalia.com> writes:
>> > > > > > 
>> > > > > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> > > > > > > > Iago Toral Quiroga <itoral@igalia.com> writes:
>> > > > > > > > 
>> > > > > > > > > Commit c84ec70b3a72 implemented execution type
>> > > > > > > > > promotion to
>> > > > > > > > > 32-
>> > > > > > > > > bit
>> > > > > > > > > for
>> > > > > > > > > conversions involving half-float registers, which
>> > > > > > > > > empirical
>> > > > > > > > > testing
>> > > > > > > > > suggested
>> > > > > > > > > was required, but it did not incorporate this change
>> > > > > > > > > into
>> > > > > > > > > the
>> > > > > > > > > assembly validator
>> > > > > > > > > logic. This commits adds that, preventing validation
>> > > > > > > > > errors
>> > > > > > > > > like
>> > > > > > > > > this:
>> > > > > > > > > 
>> > > > > > > > 
>> > > > > > > > I don't think we should be validating empirical
>> > > > > > > > assumptions
>> > > > > > > > in
>> > > > > > > > the EU
>> > > > > > > > validator.
>> > > > > > > 
>> > > > > > > I am not sure I get your point, isn't c84ec70b3a72 also
>> > > > > > > based
>> > > > > > > on
>> > > > > > > empirical testing after all?
>> > > > > > > 
>> > > > > > 
>> > > > > > To some extent, but it doesn't attempt to enforce ISA
>> > > > > > restrictions
>> > > > > > based
>> > > > > > on information obtained empirically.
>> > > > > > 
>> > > > > > > 
>> > > > > > > > > mov(16)  g9<4>B   g3<16,8,2>HF     { align1 1H };
>> > > > > > > > > ERROR: Destination stride must be equal to the ratio
>> > > > > > > > > of
>> > > > > > > > > the
>> > > > > > > > > sizes
>> > > > > > > > > of the
>> > > > > > > > >        execution data type to the destination type
>> > > > > > > > > 
>> > > > > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type
>> > > > > > > > > to
>> > > > > > > > > 32-bit
>> > > > > > > > > when any half-float conversion is needed."
>> > > > > > > > 
>> > > > > > > > I don't think this "fixes" anything that ever worked.
>> > > > > > > 
>> > > > > > > It is true that the code in that trace above is not
>> > > > > > > something
>> > > > > > > we
>> > > > > > > can
>> > > > > > > produce right now, because it is a conversion from HF to
>> > > > > > > B
>> > > > > > > and
>> > > > > > > that
>> > > > > > > should only happen within the context of
>> > > > > > > VK_KHR_shader_float16_int8,
>> > > > > > > however, this is a consequence of the fact that since
>> > > > > > > c84ec70b3a72
>> > > > > > > there is an inconsistency between what we do at the IR
>> > > > > > > level
>> > > > > > > regarding
>> > > > > > > execution size of HF conversions and what the EU
>> > > > > > > validator
>> > > > > > > is
>> > > > > > > doing,
>> > > > > > > and from that perspective this is really fixing an
>> > > > > > > inconsistency
>> > > > > > > that
>> > > > > > > didn't exist before, and I thought we would want to
>> > > > > > > address
>> > > > > > > that
>> > > > > > > sooner
>> > > > > > > rather than later and track it down to the original
>> > > > > > > change
>> > > > > > > that
>> > > > > > > introduced that inconsistency so we know where this is
>> > > > > > > coming
>> > > > > > > from.
>> > > > > > > 
>> > > > > > 
>> > > > > > The "inconsistency" between the IR's get_exec_type() and
>> > > > > > the
>> > > > > > EU
>> > > > > > validator's execution_type() has existed ever since
>> > > > > > a05b6f25bf4bfad7
>> > > > > > removed the HF assert from get_exec_type() without actually
>> > > > > > implementing
>> > > > > > the code required to handle HF operands (which is what my
>> > > > > > commit
>> > > > > > c84ec70b3a72 did).
>> > > > > 
>> > > > > I agree with the fact that since a05b6f25bf4bfad7 the
>> > > > > validator
>> > > > > could
>> > > > > reject valid code and that had nothing to do with your patch,
>> > > > 
>> > > > The validator rejected the same valid HF code since it was
>> > > > written,
>> > > > that
>> > > > had nothing to do with neither a05b6f25bf4bfad7 nor with my
>> > > > patch,
>> > > > and
>> > > > it is the real problem this patch was working around.
>> > > > 
>> > > > > but the inconsistency I am talking about here, that this
>> > > > > patch
>> > > > > fixes,
>> > > > > is the one about get_exec_type() in the IR and
>> > > > > execution_type()
>> > > > > in
>> > > > > the
>> > > > > validator doing different things for HF instructions, which
>> > > > > only
>> > > > > exists since your patch and which you discuss below.
>> > > > > 
>> > > > 
>> > > > The "inconsistency" exists ever since get_exec_type() was
>> > > > introduced
>> > > > without correct handling of HF types (even though
>> > > > execution_type()
>> > > > already attempted to handle it).  And I disagree that it's a
>> > > > real
>> > > > inconsistency except due to the fact that the validator is
>> > > > incorrectly
>> > > > attempting to validate the alignment of the destination region
>> > > > according
>> > > > to a rule that doesn't apply to HF types.
>> > > > 
>> > > > > > > Anyway, that was my rationale for the Fixes tag, but if
>> > > > > > > you
>> > > > > > > think
>> > > > > > > this
>> > > > > > > is not useful I am happy to drop this patch and just
>> > > > > > > include it
>> > > > > > > as
>> > > > > > > part
>> > > > > > > of my series without the tag.
>> > > > > > > 
>> > > > > > 
>> > > > > > I'd like to see the actual regioning restrictions for HF
>> > > > > > types
>> > > > > > implemented in the EU validator as part of your series.
>> > > > > 
>> > > > > Ok, let's see if we can agree on what restrictions should we
>> > > > > implement
>> > > > > then. I can implement this restriction as documented:
>> > > > > 
>> > > > > "Conversion between Integer and HF (Half Float) must be
>> > > > > DWord-
>> > > > > aligned
>> > > > > and strided by a DWord on the destination"
>> > > > > 
>> > > > > Instead of trying to apply the general one that is currently
>> > > > > breaking.
>> > > > > That will fix the assertion issue. I guess my issue with it
>> > > > > was
>> > > > > that
>> > > > > going by your analysis this restriction is not telling the
>> > > > > full
>> > > > > picture, this is what you had to say about this restriction:
>> > > > > 
>> > > > > "I have a feeling that the reason for this may be that the
>> > > > > 16-
>> > > > > bit
>> > > > > pipeline lacks the ability to handle conversions from or to
>> > > > > half-
>> > > > > float, 
>> > > > > so the execution type is implicitly promoted to the matching
>> > > > > (integer
>> > > > > or floating-point) 32-bit type where any HF conversion would
>> > > > > be
>> > > > > needed.  And on those the usual alignment restriction of the
>> > > > > destination to a larger execution type applies."
>> > > > > 
>> > > > > But I guess your point is that we can ignore these details at
>> > > > > the
>> > > > > validator level and just focus on validating strictly what
>> > > > > the
>> > > > > PRM
>> > > > > says. Fair enough.
>> > > > > 
>> > > > > Unfortunatley, you also mentioned that this restriction
>> > > > > *seems*
>> > > > > to
>> > > > > be
>> > > > > overriden by this one on all platforms but BDW (even though
>> > > > > the
>> > > > > both
>> > > > > are listed, at least I see both in my SKL docs):
>> > > > > 
>> > > > > "There is a relaxed alignment rule for word destinations.
>> > > > > When
>> > > > > the
>> > > > > destination type is word (UW, W, HF), destination data types
>> > > > > can be
>> > > > > aligned to either the lowest word or the second lowest word
>> > > > > of
>> > > > > the
>> > > > > execution channel. This means the destination data words can
>> > > > > be
>> > > > > either
>> > > > > all in the even word locations or all in the odd word
>> > > > > locations."
>> > > > > 
>> > > > 
>> > > > I'd implement this one since it seems like the most specific,
>> > > > except
>> > > > on
>> > > > BDW where only the previous (almost equivalent) restriction
>> > > > seems
>> > > > to
>> > > > apply. 
>> > > 
>> > > I have just realized that this restriction could be at odds with
>> > > this
>> > > other aspect of the spec:
>> > > 
>> > > "There is no direct conversion from B/UB to DF or DF to B/UB. Use
>> > > two
>> > > instructions and a word or DWord intermediate type."
>> > > 
>> > > So it is okay to convert from B to W and then from W to DF/Q (it
>> > > has
>> > > the same text from conversions between HF and 64-bit types). We
>> > > actually emit these conversions and they work fine going by the
>> > > existing CTS tests, but these conversions are not aligned to 32-
>> > > bit
>> > > like the restriction states, instead they are aligned to 64-bit
>> > > (the
>> > > regioning lowering pass does this):
>> > > 
>> > > mov(8) g14<4>W   g5<4,4,1>Q   { align1 2Q };
>> > > 
>> > > So I think this is a bit inconsistent again and I guess we need
>> > > to
>> > > choose what we want to do. I can think of 4 options:
>> > > 
>> > > 1. We could assume that the restriction is correctly formulated
>> > > and
>> > > split the instructions to respect the restriction as-is even if
>> > > we
>> > > have
>> > > not found any evidence that this doesn't work
>> > > 
>> > > 2. We could assume that the restriction is correctly formulated
>> > > and
>> > > the
>> > > regioning lowering pass need to be fixed to generate a conversion
>> > > with
>> > > a stride of 2 even if the execution size if 64-bit (haven't tried
>> > > this
>> > > since it sounds sketchy to me).
>> > > 
>> > > 3. We could understand that the restriction is strictly
>> > > formulated
>> > > for
>> > > conversions from a 32-bit execution types, and decide that it
>> > > does
>> > > not
>> > > apply to conversions from 64-bit sources.
>> > > 
>> > > 4. We can assume that the restriction is formulated incorrectly
>> > > and
>> > > instead of saying that destination words should all be in even or
>> > > odd
>> > > slots, assuming a conversion from a 32-bit type, it should just
>> > > say
>> > > that they should be aligned to the execution type to also
>> > > accomodate
>> > > conversions from 64-bit types.
>> > > 
>> > 
>> > 4. is basically what the back-end is assuming right now, but it is
>> > indeed not literally equivalent to the regioning restrictions as
>> > they're
>> > formulated in the B-Spec.  I'd suggest not validating the DWORD
>> > alignment rule for HF instructions whenever the execution type is
>> > greater than 32 bits, since the B-Spec language about those is
>> > rather
>> > inconsistent.
>
> What about conversions like :B->:W or :W->:UW? These have a word
> destination and would be subject to the same restriction, but the
> execution type is not DWord, so we are not currently forcing DWord
> alignment for them.  I have not seen anythig fail for these conversions
> but if we implement the rule for them too we will fail validation. I
> suppose we could decide to validate the rule only HF instructions like
> you were (probably purposedly?) suggesting above.
>

Yes, I really only meant we should validate this HF restriction for
instructions with an HF conversion like the hardware spec says.

>> Ok, I will do that. Just one thing: I guess that when you say HF
>> instructions you mean 16-bit destinations, since that is the target
>> of
>> the restriction.
>> > > And indepedently of how we decide to interpret the restriction in
>> > > the
>> > > end, given that we have discussed here that we should only
>> > > validate
>> > > things as they are described in the PRM, I think we also need to
>> > > decide
>> > > what we implement in the validator (if anything) if we decide to
>> > > go
>> > > with either 3) or 4) (were we would not be exactly following the
>> > > PRM to
>> > > the letter).
>> > > 
>> > > I kind of think that 4) might be what is going on here...
>> > > what do you think?
>> > > 
>> > > >  There are shitloads of other restrictions we're missing for HF
>> > > > types BTW, check out the section "Special Requirements for
>> > > > Handling
>> > > > Mixed Mode Float Operations" of the hardware spec.
>> > > > 
>> > > > > Which you discussed didn't make sense to you and I agreed, if
>> > > > > only
>> > > > > because my own experiments suggested that the implications
>> > > > > that
>> > > > > one
>> > > > > would derive from a strict interpretation of this (that
>> > > > > packed
>> > > > > 16-
>> > > > > bit
>> > > > > data is not supported) are not quite true going by the fact
>> > > > > that we
>> > > > > are
>> > > > > emitting packed instructions on all platforms without any
>> > > > > indication
>> > > > > that this doesn't work. So what should we do about this one?
>> > > > > If
>> > > > > we
>> > > > > decide that we want to implement this then we have to re-
>> > > > > think
>> > > > > the
>> > > > > implementation we have.
>> > > > > 
>> > > > 
>> > > > The implication on packed HF instructions is likely accidental,
>> > > > I
>> > > > don't
>> > > > think we should enforce the rule except for conversion
>> > > > operations.
>> > > > 
>> > > > > Should we just validate the one about the integer/float
>> > > > > conversions?
>> > > > > Should we do that only on BDW or on all platforms?
>> > > > > 
>> > > > > >   I don't see the
>> > > > > > need to introduce any empirical assumptions into the EU
>> > > > > > validator's
>> > > > > > execution_type() in order to achieve that, even if that
>> > > > > > means
>> > > > > > that
>> > > > > > get_exec_type() and execution_type() don't do the exact
>> > > > > > same
>> > > > > > calculation
>> > > > > > -- What you call an inconsistency is the consequence of
>> > > > > > execution_type()
>> > > > > > being the hardware spec's opinion on what the execution
>> > > > > > type
>> > > > > > is,
>> > > > > > which
>> > > > > > we assume is what we need to use while enforcing a
>> > > > > > regioning
>> > > > > > restriction
>> > > > > > that refers to the execution type of the instruction.
>> > > > > 
>> > > > > Mmm... I guess my question is: is the hardware really
>> > > > > assuming
>> > > > > that
>> > > > > execution type in all cases that involve HF instructions?
>> > > > > What
>> > > > > I
>> > > > > got
>> > > > > from your analysis of the whole situation is that the
>> > > > > hardware
>> > > > > is
>> > > > > actually promoting the execution size to 32-bit in some
>> > > > > cases,
>> > > > > and
>> > > > > if
>> > > > > that is the case, then does it make sense to implement code
>> > > > > in
>> > > > > the
>> > > > > validator that ignores that fact?
>> > > > 
>> > > > If the hardware spec chose to ignore that fact and instead gave
>> > > > us a
>> > > > different set of regioning restrictions for us to use on HF
>> > > > restrictions
>> > > > that don't rely on the definition of execution type, we should
>> > > > probably
>> > > > have the validator match the hardware spec literally.
>> > > > 
>> > > > > I understand that you think this doesn't have any
>> > > > > significance
>> > > > > once
>> > > > > we
>> > > > > make sure that we only apply the restrictions specifically
>> > > > > documented
>> > > > > for HF from the PRM, and you are probably right, however I
>> > > > > have
>> > > > > to
>> > > > > admit that it feels a bit odd to me that we do that when we
>> > > > > know
>> > > > > that
>> > > > > under the hood there is much more going on and we are
>> > > > > producing
>> > > > > FS
>> > > > > IR
>> > > > > code following different rules after all. But maybe it is
>> > > > > just
>> > > > > me
>> > > > > being too paranoid about this...
>> > > > > 
>> > > > 
>> > > > Wouldn't it be even odder to have the hardware restriction
>> > > > validator
>> > > > diverge from the documentation it's supposedly validating?
>> > > > 
>> > > > > 
>> > > > > > > >   The validator is
>> > > > > > > > still missing an implementation of the quirky HF
>> > > > > > > > restrictions,
>> > > > > > > > and it
>> > > > > > > > wasn't the purpose of c84ec70b3a72 to do such a thing.
>> > > > > > > 
>> > > > > > > While this is true in general, the EU validator does
>> > > > > > > consider
>> > > > > > > the
>> > > > > > > execution type of the instruction to validate general
>> > > > > > > rules
>> > > > > > > such as
>> > > > > > > the
>> > > > > > > one I mentioned in the commit message in this patch. And
>> > > > > > > that
>> > > > > > > part
>> > > > > > > of
>> > > > > > > the validator is inconsistent with c84ec70b3a72.
>> > > > > > 
>> > > > > > That part of the validator was also inconsistent with the
>> > > > > > code
>> > > > > > generated
>> > > > > > by your original VK_KHR_shader_float16_int8 series even
>> > > > > > before I
>> > > > > > committed c84ec70b3a72.  The reason is that it is trying to
>> > > > > > validate
>> > > > > > a
>> > > > > > restriction that rejects working code, because the
>> > > > > > "general"
>> > > > > > rule
>> > > > > > it's
>> > > > > > trying to enforce isn't supposed to apply to instructions
>> > > > > > with HF
>> > > > > > operands, which is the real bug.
>> > > > > 
>> > > > > I agree with that, and I'll fix that in my series by
>> > > > > preventing
>> > > > > that
>> > > > > rule to apply to these instructions (pending to agree on the
>> > > > > rules
>> > > > > that
>> > > > > we should validate instead), but as I have already explained
>> > > > > above,
>> > > > > that was not the inconsistency I was referring to.
>> > > > > 
>> > > > 
>> > > > It's the only actual inconsistency this was fixing.
>> > > > 
>> > > > > > > In fact, the EU validator is accounting for execution
>> > > > > > > size
>> > > > > > > promotion
>> > > > > > > of HF instructions to 32-bit in SKL+ and CHV only, for
>> > > > > > > conversions
>> > > > > > > from HF->F and mixed float mode instructions... which is
>> > > > > > > part
>> > > > > > > of
>> > > > > > > what
>> > > > > > > c84ec70b3a72 addresses at the IR level, which it actually
>> > > > > > > does
>> > > > > > > for
>> > > > > > > all
>> > > > > > > hardware platforms and in more cases.
>> > > > > > > 
>> > > > > > 
>> > > > > > I'm fine with fixing execution_type() to do the right thing
>> > > > > > in
>> > > > > > more
>> > > > > > cases and platforms, but I don't think that should involve
>> > > > > > making
>> > > > > > empirical assumptions in the validator.
>> > > > > 
>> > > > > Just to be clear, in the particular case we are discussing
>> > > > > here, I
>> > > > > understand this means that we should leave the validator's
>> > > > > execution_type() as-is.
>> > > > > 
>> > > > 
>> > > > That's not what I was saying.  Last time I looked at
>> > > > execution_type()
>> > > > it
>> > > > did seem somewhat sketchy regarding HF types.  I wouldn't be
>> > > > surprised
>> > > > if it needs some fixes to match the hardware spec (*not* our
>> > > > empirical
>> > > > knowledge of the hardware) in addition to the above.
>> > > > 
>> > > > > > > >   You *should*
>> > > > > > > > definitely implement those restrictions (as they're
>> > > > > > > > stated in
>> > > > > > > > the
>> > > > > > > > hardware spec, without empirical assumptions) in the
>> > > > > > > > validator as
>> > > > > > > > part
>> > > > > > > > of your VK_KHR_shader_float16_int8 series,
>> > > > > > > 
>> > > > > > > Again, I am not sure what you mean by "without empirical
>> > > > > > > assumptions".
>> > > > > > 
>> > > > > > I was just paraphrasing your comment.  If you feel the need
>> > > > > > to
>> > > > > > write
>> > > > > > a
>> > > > > > comment justifying the existence of some code in the
>> > > > > > validator
>> > > > > > based
>> > > > > > on
>> > > > > > empirical testing, it probably doesn't belong in the
>> > > > > > validator.
>> > > > > > 
>> > > > > > > According to the commit message in c84ec70b3a72 "the docs
>> > > > > > > are
>> > > > > > > fairly
>> > > > > > > imcomplete and inconsistent" and you explained here that
>> > > > > > > you
>> > > > > > > had to
>> > > > > > > do
>> > > > > > > some experimentation of your own using the simulator
>> > > > > > > (where
>> > > > > > > you
>> > > > > > > found
>> > > > > > > its results to also be inconsistent with the hardware
>> > > > > > > docs)
>> > > > > > > to
>> > > > > > > try
>> > > > > > > and
>> > > > > > > guess what is happening. That's why I was using the term
>> > > > > > > "empirical"
>> > > > > > > here, since the hardware docs alone aren't clear or
>> > > > > > > correct
>> > > > > > > enough
>> > > > > > > to
>> > > > > > > understand what is really happening, when and in what
>> > > > > > > platforms.
>> > > > > > > 
>> > > > > > 
>> > > > > > If some hardware restriction is described in a
>> > > > > > contradictory
>> > > > > > or
>> > > > > > ambiguous way by the hardware spec we should probably avoid
>> > > > > > enforcing
>> > > > > > it
>> > > > > > altogether in the EU validator.
>> > > > > 
>> > > > > Okay, in that case I guess you would rule the restriction
>> > > > > about
>> > > > > the
>> > > > > relaxed alignment rule for Word destinations as such, but not
>> > > > > the
>> > > > > one
>> > > > > about half-float/integer conversions. Is that correct?
>> > > > > 
>> > > > 
>> > > > We probably need to enforce both since they apply to different
>> > > > sets
>> > > > of
>> > > > hardware.
>> > > > 
>> > > > > > > Anyway, if you don't like the term "empirical" to refer
>> > > > > > > to
>> > > > > > > all
>> > > > > > > this,
>> > > > > > > that's fine by me, but what I need to know is if we agree
>> > > > > > > that
>> > > > > > > we
>> > > > > > > need
>> > > > > > > to implement the same type promotion rules in the
>> > > > > > > validator
>> > > > > > > that we
>> > > > > > > are
>> > > > > > > implementing in the IR, indepedently of whether refer to
>> > > > > > > them
>> > > > > > > as
>> > > > > > > "based
>> > > > > > > on empirical testing" or not.
>> > > > > > > 
>> > > > > > 
>> > > > > > I don't think we agree on that.  IMHO you want to enforce
>> > > > > > the
>> > > > > > regioning
>> > > > > > restrictions that the hardware spec describes for HF types
>> > > > > > rather
>> > > > > > than
>> > > > > > the current (incorrect and incomplete) generalization of
>> > > > > > the
>> > > > > > standard
>> > > > > > restrictions.  With those in place the apparent
>> > > > > > inconsistency
>> > > > > > between
>> > > > > > get_exec_type() and the hardware spec's execution type
>> > > > > > would
>> > > > > > be
>> > > > > > harmless.
>> > > > > 
>> > > > > I agree with that. I still feel uneasy about having two
>> > > > > different 
>> > > > > implementations of the execution type in FS IR and the
>> > > > > validator,
>> > > > > even
>> > > > > if that doesn't lead to any issues when we do what you say
>> > > > > here...
>> > > > > 
>> > > > 
>> > > > I feel uneasy about the validator not matching the hardware
>> > > > spec.
>> > > > Luckily its only purpose is validating the rest of the
>> > > > compiler,
>> > > > so
>> > > > if
>> > > > there is an actual inconsistency as you seem to think, it will
>> > > > come
>> > > > up.
>> > > > 
>> > > > > At the very least I think that deserves a comment in the
>> > > > > validator
>> > > > > explaining why we think that is okay.
>> > > > > 
>> > > > 
>> > > > A quote of the hardware spec should be enough of a
>> > > > comment.  Right?  And
>> > > > there is already a comment in get_exec_type() explaining why
>> > > > its
>> > > > HF
>> > > > type
>> > > > handling logic is consistent with the hardware spec
>> > > > restrictions
>> > > > for
>> > > > HF
>> > > > types.
>> > > > 
>> > > > > > > >  if anything because currently
>> > > > > > > > it will reject working code that uses HF types.
>> > > > > > > 
>> > > > > > > Just for the sake of clarity, since that sentence above
>> > > > > > > could
>> > > > > > > be
>> > > > > > > interpreted as if all HF code would be rejected: we have
>> > > > > > > been
>> > > > > > > using
>> > > > > > > HF
>> > > > > > > types since we landed VK_KHR_16bit_storage, which
>> > > > > > > includes
>> > > > > > > conversions
>> > > > > > > between HF and F and the EU validator is not complaining
>> > > > > > > about
>> > > > > > > any
>> > > > > > > of
>> > > > > > > that. It is currently a problem only with conversions to
>> > > > > > > smaller
>> > > > > > > types
>> > > > > > > (so only conversions to Byte) because that's where we
>> > > > > > > check
>> > > > > > > for
>> > > > > > > that
>> > > > > > > restriction on the stride, which is based on the
>> > > > > > > execution
>> > > > > > > type
>> > > > > > > of
>> > > > > > > the
>> > > > > > > instruction.
>> > > > > > > 
>> > > > > > > > 
>> > > > > > > > > ---
>> > > > > > > > >  src/intel/compiler/brw_eu_validate.c | 27
>> > > > > > > > > ++++++++++++++
>> > > > > > > > > ----
>> > > > > > > > > ----
>> > > > > > > > > -----
>> > > > > > > > >  1 file changed, 14 insertions(+), 13 deletions(-)
>> > > > > > > > > 
>> > > > > > > > > diff --git a/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > b/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > index a25010b225c..3bb37677672 100644
>> > > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > @@ -325,17 +325,20 @@ execution_type(const struct
>> > > > > > > > > gen_device_info
>> > > > > > > > > *devinfo, const brw_inst *inst)
>> > > > > > > > >     unsigned num_sources =
>> > > > > > > > > num_sources_from_inst(devinfo,
>> > > > > > > > > inst);
>> > > > > > > > >     enum brw_reg_type src0_exec_type, src1_exec_type;
>> > > > > > > > >  
>> > > > > > > > > -   /* Execution data type is independent of
>> > > > > > > > > destination
>> > > > > > > > > data
>> > > > > > > > > type,
>> > > > > > > > > except in
>> > > > > > > > > -    * mixed F/HF instructions on CHV and SKL+.
>> > > > > > > > > +   /* Empirical testing suggests that type
>> > > > > > > > > conversions
>> > > > > > > > > involving
>> > > > > > > > > half-float
>> > > > > > > > > +    * promote execution type to 32-bit. See
>> > > > > > > > > get_exec_type() in
>> > > > > > > > > brw_ir_fs.h.
>> > > > > > > > >      */
>> > > > > > > > >     enum brw_reg_type dst_exec_type =
>> > > > > > > > > brw_inst_dst_type(devinfo,
>> > > > > > > > > inst);
>> > > > > > > > >  
>> > > > > > > > >     src0_exec_type =
>> > > > > > > > > execution_type_for_type(brw_inst_src0_type(devinfo,
>> > > > > > > > > inst));
>> > > > > > > > >     if (num_sources == 1) {
>> > > > > > > > > -      if ((devinfo->gen >= 9 || devinfo-
>> > > > > > > > > > is_cherryview) &&
>> > > > > > > > > 
>> > > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_HF) {
>> > > > > > > > > -         return dst_exec_type;
>> > > > > > > > > +      if (type_sz(src0_exec_type) == 2 &&
>> > > > > > > > > dst_exec_type !=
>> > > > > > > > > src0_exec_type) {
>> > > > > > > > > +         if (src0_exec_type == BRW_REGISTER_TYPE_HF)
>> > > > > > > > > +            return BRW_REGISTER_TYPE_F;
>> > > > > > > > > +         else if (dst_exec_type ==
>> > > > > > > > > BRW_REGISTER_TYPE_HF)
>> > > > > > > > > +            return BRW_REGISTER_TYPE_D;
>> > > > > > > > >        }
>> > > > > > > > > +
>> > > > > > > > >        return src0_exec_type;
>> > > > > > > > >     }
>> > > > > > > > >  
>> > > > > > > > > @@ -367,14 +370,12 @@ execution_type(const struct
>> > > > > > > > > gen_device_info
>> > > > > > > > > *devinfo, const brw_inst *inst)
>> > > > > > > > >         src1_exec_type == BRW_REGISTER_TYPE_DF)
>> > > > > > > > >        return BRW_REGISTER_TYPE_DF;
>> > > > > > > > >  
>> > > > > > > > > -   if (devinfo->gen >= 9 || devinfo->is_cherryview)
>> > > > > > > > > {
>> > > > > > > > > -      if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > > > > -          src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > > > > -          src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > > > > > > -         return BRW_REGISTER_TYPE_F;
>> > > > > > > > > -      } else {
>> > > > > > > > > -         return BRW_REGISTER_TYPE_HF;
>> > > > > > > > > -      }
>> > > > > > > > > +   if (dst_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > > > > +       src0_exec_type == BRW_REGISTER_TYPE_F ||
>> > > > > > > > > +       src1_exec_type == BRW_REGISTER_TYPE_F) {
>> > > > > > > > > +      return BRW_REGISTER_TYPE_F;
>> > > > > > > > > +   } else {
>> > > > > > > > > +      return BRW_REGISTER_TYPE_HF;
>> > > > > > > > >     }
>> > > > > > > > >  
>> > > > > > > > >     assert(src0_exec_type == BRW_REGISTER_TYPE_F);
>> > > > > > > > > -- 
>> > > > > > > > > 2.17.1