[v3,08/42] intel/compiler: implement 16-bit fsign

Submitted by Iago Toral Quiroga on Jan. 15, 2019, 1:53 p.m.

Details

Message ID 20190115135414.2313-9-itoral@igalia.com
State New
Headers show
Series "intel: VK_KHR_shader_float16_int8 implementation" ( rev: 5 4 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Jan. 15, 2019, 1:53 p.m.
v2:
 - make 16-bit be its own separate case (Jason)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
---
 src/intel/compiler/brw_fs_nir.cpp | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index d742f55a957..cf546b8ff09 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -844,7 +844,22 @@  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
             : bld.MOV(result, brw_imm_f(1.0f));
 
          set_predicate(BRW_PREDICATE_NORMAL, inst);
-      } else if (type_sz(op[0].type) < 8) {
+      } else if (type_sz(op[0].type) == 2) {
+         /* AND(val, 0x8000) gives the sign bit.
+          *
+          * Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is not zero.
+          */
+         fs_reg zero = retype(brw_imm_uw(0), BRW_REGISTER_TYPE_HF);
+         bld.CMP(bld.null_reg_f(), op[0], zero, BRW_CONDITIONAL_NZ);
+
+         fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UW);
+         op[0].type = BRW_REGISTER_TYPE_UW;
+         result.type = BRW_REGISTER_TYPE_UW;
+         bld.AND(result_int, op[0], brw_imm_uw(0x8000u));
+
+         inst = bld.OR(result_int, result_int, brw_imm_uw(0x3c00u));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      } else if (type_sz(op[0].type) == 4) {
          /* AND(val, 0x80000000) gives the sign bit.
           *
           * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
@@ -866,6 +881,7 @@  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
           * - The sign is encoded in the high 32-bit of each DF
           * - We need to produce a DF result.
           */
+         assert(type_sz(op[0].type) == 8);
 
          fs_reg zero = vgrf(glsl_type::double_type);
          bld.MOV(zero, setup_imm_df(bld, 0.0));

Comments

On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga <itoral@igalia.com>
wrote:

> v2:
>  - make 16-bit be its own separate case (Jason)
>
> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index d742f55a957..cf546b8ff09 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -844,7 +844,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
> nir_alu_instr *instr)
>              : bld.MOV(result, brw_imm_f(1.0f));
>
>           set_predicate(BRW_PREDICATE_NORMAL, inst);
> -      } else if (type_sz(op[0].type) < 8) {
> +      } else if (type_sz(op[0].type) == 2) {
> +         /* AND(val, 0x8000) gives the sign bit.
> +          *
> +          * Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is
> not zero.
> +          */
> +         fs_reg zero = retype(brw_imm_uw(0), BRW_REGISTER_TYPE_HF);
> +         bld.CMP(bld.null_reg_f(), op[0], zero, BRW_CONDITIONAL_NZ);
> +
> +         fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UW);
> +         op[0].type = BRW_REGISTER_TYPE_UW;
> +         result.type = BRW_REGISTER_TYPE_UW;
>

Why are you whacking the type on result and also making a result_int temp?
I guess you just copied that from the 32-bit case?  If we're going to whack
result.type (which is fine), just use result for the rest of it.  With that
fixed,

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>


> +         bld.AND(result_int, op[0], brw_imm_uw(0x8000u));
> +
> +         inst = bld.OR(result_int, result_int, brw_imm_uw(0x3c00u));
> +         inst->predicate = BRW_PREDICATE_NORMAL;
> +      } else if (type_sz(op[0].type) == 4) {
>           /* AND(val, 0x80000000) gives the sign bit.
>            *
>            * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val
> is not
> @@ -866,6 +881,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
> nir_alu_instr *instr)
>            * - The sign is encoded in the high 32-bit of each DF
>            * - We need to produce a DF result.
>            */
> +         assert(type_sz(op[0].type) == 8);
>
>           fs_reg zero = vgrf(glsl_type::double_type);
>           bld.MOV(zero, setup_imm_df(bld, 0.0));
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Thu, 2019-01-17 at 13:55 -0600, Jason Ekstrand wrote:
> On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga <itoral@igalia.com
> > wrote:
> > v2:
> > 
> >  - make 16-bit be its own separate case (Jason)
> > 
> > 
> > 
> > Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
> > 
> > ---
> > 
> >  src/intel/compiler/brw_fs_nir.cpp | 18 +++++++++++++++++-
> > 
> >  1 file changed, 17 insertions(+), 1 deletion(-)
> > 
> > 
> > 
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > 
> > index d742f55a957..cf546b8ff09 100644
> > 
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > 
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > 
> > @@ -844,7 +844,22 @@ fs_visitor::nir_emit_alu(const fs_builder
> > &bld, nir_alu_instr *instr)
> > 
> >              : bld.MOV(result, brw_imm_f(1.0f));
> > 
> > 
> > 
> >           set_predicate(BRW_PREDICATE_NORMAL, inst);
> > 
> > -      } else if (type_sz(op[0].type) < 8) {
> > 
> > +      } else if (type_sz(op[0].type) == 2) {
> > 
> > +         /* AND(val, 0x8000) gives the sign bit.
> > 
> > +          *
> > 
> > +          * Predicated OR ORs 1.0 (0x3c00) with the sign bit if
> > val is not zero.
> > 
> > +          */
> > 
> > +         fs_reg zero = retype(brw_imm_uw(0),
> > BRW_REGISTER_TYPE_HF);
> > 
> > +         bld.CMP(bld.null_reg_f(), op[0], zero,
> > BRW_CONDITIONAL_NZ);
> > 
> > +
> > 
> > +         fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UW);
> > 
> > +         op[0].type = BRW_REGISTER_TYPE_UW;
> > 
> > +         result.type = BRW_REGISTER_TYPE_UW;
> 
> Why are you whacking the type on result and also making a result_int
> temp?  I guess you just copied that from the 32-bit case?  

Oh yes, I didn't noticed that.
> If we're going to whack result.type (which is fine), just use result
> for the rest of it.  With that fixed,

Right, while I am on it I guess it makes sense to do this small fix for
the 32-bit case in the same patch unless you prefer that to be a
separate change.
> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
>  
> > +         bld.AND(result_int, op[0], brw_imm_uw(0x8000u));
> > 
> > +
> > 
> > +         inst = bld.OR(result_int, result_int,
> > brw_imm_uw(0x3c00u));
> > 
> > +         inst->predicate = BRW_PREDICATE_NORMAL;
> > 
> > +      } else if (type_sz(op[0].type) == 4) {
> > 
> >           /* AND(val, 0x80000000) gives the sign bit.
> > 
> >            *
> > 
> >            * Predicated OR ORs 1.0 (0x3f800000) with the sign bit
> > if val is not
> > 
> > @@ -866,6 +881,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld,
> > nir_alu_instr *instr)
> > 
> >            * - The sign is encoded in the high 32-bit of each DF
> > 
> >            * - We need to produce a DF result.
> > 
> >            */
> > 
> > +         assert(type_sz(op[0].type) == 8);
> > 
> > 
> > 
> >           fs_reg zero = vgrf(glsl_type::double_type);
> > 
> >           bld.MOV(zero, setup_imm_df(bld, 0.0));
> >
On January 18, 209 01:56:05 Iago Toral <itoral@igalia.com> wrote:
> On Thu, 2019-01-17 at 13:55 -0600, Jason Ekstrand wrote:
>> On Tue, Jan 15, 2019 at 7:54 AM Iago Toral Quiroga <itoral@igalia.com> wrote:
>>> v2:
>>> - make 16-bit be its own separate case (Jason)
>>>
>>> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
>>> ---
>>> src/intel/compiler/brw_fs_nir.cpp | 18 +++++++++++++++++-
>>> 1 file changed, 17 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
>>> b/src/intel/compiler/brw_fs_nir.cpp
>>> index d742f55a957..cf546b8ff09 100644
>>> --- a/src/intel/compiler/brw_fs_nir.cpp
>>> +++ b/src/intel/compiler/brw_fs_nir.cpp
>>> @@ -844,7 +844,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
>>> nir_alu_instr *instr)
>>>             : bld.MOV(result, brw_imm_f(1.0f));
>>>
>>>          set_predicate(BRW_PREDICATE_NORMAL, inst);
>>> -      } else if (type_sz(op[0].type) < 8) {
>>> +      } else if (type_sz(op[0].type) == 2) {
>>> +         /* AND(val, 0x8000) gives the sign bit.
>>> +          *
>>> +          * Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is not 
>>> zero.
>>> +          */
>>> +         fs_reg zero = retype(brw_imm_uw(0), BRW_REGISTER_TYPE_HF);
>>> +         bld.CMP(bld.null_reg_f(), op[0], zero, BRW_CONDITIONAL_NZ);
>>> +
>>> +         fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UW);
>>> +         op[0].type = BRW_REGISTER_TYPE_UW;
>>> +         result.type = BRW_REGISTER_TYPE_UW;
>>
>> Why are you whacking the type on result and also making a result_int temp?  
>> I guess you just copied that from the 32-bit case?
>
> Oh yes, I didn't noticed that.
>
>> If we're going to whack result.type (which is fine), just use result for 
>> the rest of it.  With that fixed,
>
> Right, while I am on it I guess it makes sense to do this small fix for the 
> 32-bit case in the same patch unless you prefer that to be a separate change.

That's probably best separate on the off chance something bisects to it.  
You can automatically add my review to the new patch though.

>
>> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
>>
>>> +         bld.AND(result_int, op[0], brw_imm_uw(0x8000u));
>>> +
>>> +         inst = bld.OR(result_int, result_int, brw_imm_uw(0x3c00u));
>>> +         inst->predicate = BRW_PREDICATE_NORMAL;
>>> +      } else if (type_sz(op[0].type) == 4) {
>>>          /* AND(val, 0x80000000) gives the sign bit.
>>>           *
>>>           * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
>>> @@ -866,6 +881,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
>>> nir_alu_instr *instr)
>>>           * - The sign is encoded in the high 32-bit of each DF
>>>           * - We need to produce a DF result.
>>>           */
>>> +         assert(type_sz(op[0].type) == 8);
>>>
>>>          fs_reg zero = vgrf(glsl_type::double_type);
>>>          bld.MOV(zero, setup_imm_df(bld, 0.0));