[v2,51/53] intel/compiler: support half-float in the combine constants pass

Submitted by Iago Toral Quiroga on Dec. 19, 2018, 11:51 a.m.

Details

Message ID 20181219115121.20815-52-itoral@igalia.com
State New
Headers show
Series "intel: VK_KHR_shader_float16_int8 implementation" ( rev: 3 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Dec. 19, 2018, 11:51 a.m.
---
 .../compiler/brw_fs_combine_constants.cpp     | 60 +++++++++++++++----
 1 file changed, 49 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
index e0c95d379b8..24307e365ab 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -36,6 +36,7 @@ 
 
 #include "brw_fs.h"
 #include "brw_cfg.h"
+#include "util/half_float.h"
 
 using namespace brw;
 
@@ -114,8 +115,9 @@  struct imm {
     */
    exec_list *uses;
 
-   /** The immediate value.  We currently only handle floats. */
+   /** The immediate value.  We currently only handle float and half-float. */
    float val;
+   brw_reg_type type;
 
    /**
     * The GRF register and subregister number where we've decided to store the
@@ -145,10 +147,10 @@  struct table {
 };
 
 static struct imm *
-find_imm(struct table *table, float val)
+find_imm(struct table *table, float val, brw_reg_type type)
 {
    for (int i = 0; i < table->len; i++) {
-      if (table->imm[i].val == val) {
+      if (table->imm[i].val == val && table->imm[i].type == type) {
          return &table->imm[i];
       }
    }
@@ -190,6 +192,20 @@  compare(const void *_a, const void *_b)
    return a->first_use_ip - b->first_use_ip;
 }
 
+static bool
+needs_negate(float reg_val, float imm_val, brw_reg_type type)
+{
+   /* reg_val represents the immediate value in the register in its original
+    * bit-size, while imm_val is always a valid 32-bit float value.
+    */
+   if (type == BRW_REGISTER_TYPE_HF) {
+      uint32_t reg_val_ud = *((uint32_t *) &reg_val);
+      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
+   }
+
+   return signbit(imm_val) != signbit(reg_val);
+}
+
 bool
 fs_visitor::opt_combine_constants()
 {
@@ -215,12 +231,20 @@  fs_visitor::opt_combine_constants()
 
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file != IMM ||
-             inst->src[i].type != BRW_REGISTER_TYPE_F)
+             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
+              inst->src[i].type != BRW_REGISTER_TYPE_HF))
             continue;
 
-         float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
-                     fabs(inst->src[i].f);
-         struct imm *imm = find_imm(&table, val);
+         float val;
+         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
+            val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
+                        fabs(inst->src[i].f);
+         } else {
+            val = !inst->can_do_source_mods(devinfo) ?
+               _mesa_half_to_float(inst->src[i].d & 0xffff) :
+               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
+         }
+         struct imm *imm = find_imm(&table, val, inst->src[i].type);
 
          if (imm) {
             bblock_t *intersection = cfg_t::intersect(block, imm->block);
@@ -238,6 +262,7 @@  fs_visitor::opt_combine_constants()
             imm->uses = new(const_ctx) exec_list();
             imm->uses->push_tail(link(const_ctx, &inst->src[i]));
             imm->val = val;
+            imm->type = inst->src[i].type;
             imm->uses_by_coissue = could_coissue(devinfo, inst);
             imm->must_promote = must_promote_imm(devinfo, inst);
             imm->first_use_ip = ip;
@@ -278,12 +303,23 @@  fs_visitor::opt_combine_constants()
                       imm->block->last_non_control_flow_inst()->next);
       const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
 
-      ibld.MOV(reg, brw_imm_f(imm->val));
+      reg = retype(reg, imm->type);
+      if (imm->type == BRW_REGISTER_TYPE_F) {
+         ibld.MOV(reg, brw_imm_f(imm->val));
+      } else {
+         const uint16_t val_hf = _mesa_float_to_half(imm->val);
+         ibld.MOV(reg, retype(brw_imm_uw(val_hf), BRW_REGISTER_TYPE_HF));
+      }
       imm->nr = reg.nr;
       imm->subreg_offset = reg.offset;
 
+      /* Keep offsets 32-bit aligned since we are mixing 32-bit and 16-bit
+       * constants into the same register
+       *
+       * TODO: try to pack pairs of HF constants into each 32-bit slot
+       */
       reg.offset += sizeof(float);
-      if (reg.offset == 8 * sizeof(float)) {
+      if (reg.offset == REG_SIZE) {
          reg.nr = alloc.allocate(1);
          reg.offset = 0;
       }
@@ -296,11 +332,13 @@  fs_visitor::opt_combine_constants()
          fs_reg *reg = link->reg;
          reg->file = VGRF;
          reg->nr = table.imm[i].nr;
+         reg->type = table.imm[i].type;
          reg->offset = table.imm[i].subreg_offset;
          reg->stride = 0;
-         reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
+         reg->negate = needs_negate(reg->f, table.imm[i].val, table.imm[i].type);
          assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
-                fabsf(reg->f) == fabs(table.imm[i].val));
+                fabsf(reg->f) == fabs(table.imm[i].val) ||
+                table.imm[i].type == BRW_REGISTER_TYPE_HF);
       }
    }
 

Comments

On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> ---
>  .../compiler/brw_fs_combine_constants.cpp     | 60 +++++++++++++++----
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
> index e0c95d379b8..24307e365ab 100644
> --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> @@ -36,6 +36,7 @@
>  
>  #include "brw_fs.h"
>  #include "brw_cfg.h"
> +#include "util/half_float.h"
>  
>  using namespace brw;
>  
> @@ -114,8 +115,9 @@ struct imm {
>      */
>     exec_list *uses;
>  
> -   /** The immediate value.  We currently only handle floats. */
> +   /** The immediate value.  We currently only handle float and half-float. */
>     float val;
> +   brw_reg_type type;
>  
>     /**
>      * The GRF register and subregister number where we've decided to store the
> @@ -145,10 +147,10 @@ struct table {
>  };
>  
>  static struct imm *
> -find_imm(struct table *table, float val)
> +find_imm(struct table *table, float val, brw_reg_type type)
>  {
>     for (int i = 0; i < table->len; i++) {
> -      if (table->imm[i].val == val) {
> +      if (table->imm[i].val == val && table->imm[i].type == type) {
>           return &table->imm[i];
>        }
>     }
> @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
>     return a->first_use_ip - b->first_use_ip;
>  }
>  
> +static bool
> +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> +{
> +   /* reg_val represents the immediate value in the register in its original
> +    * bit-size, while imm_val is always a valid 32-bit float value.
> +    */
> +   if (type == BRW_REGISTER_TYPE_HF) {
> +      uint32_t reg_val_ud = *((uint32_t *) &reg_val);

Casting "float" to uint32_t and reading then only 16-bits from it looks a
little ugly. Could we use "uint32_t reg_val" and then below in the caller
use "reg->u" instead of "reg->f"?

> +      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> +   }
> +
> +   return signbit(imm_val) != signbit(reg_val);
> +}
> +
>  bool
>  fs_visitor::opt_combine_constants()
>  {
> @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
>  
>        for (int i = 0; i < inst->sources; i++) {
>           if (inst->src[i].file != IMM ||
> -             inst->src[i].type != BRW_REGISTER_TYPE_F)
> +             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> +              inst->src[i].type != BRW_REGISTER_TYPE_HF))
>              continue;
>  
> -         float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> -                     fabs(inst->src[i].f);
> -         struct imm *imm = find_imm(&table, val);
> +         float val;
> +         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> +            val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> +                        fabs(inst->src[i].f);
> +         } else {
> +            val = !inst->can_do_source_mods(devinfo) ?
> +               _mesa_half_to_float(inst->src[i].d & 0xffff) :
> +               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> +         }
> +         struct imm *imm = find_imm(&table, val, inst->src[i].type);
>  
>           if (imm) {
>              bblock_t *intersection = cfg_t::intersect(block, imm->block);
> @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
>              imm->uses = new(const_ctx) exec_list();
>              imm->uses->push_tail(link(const_ctx, &inst->src[i]));
>              imm->val = val;
> +            imm->type = inst->src[i].type;
>              imm->uses_by_coissue = could_coissue(devinfo, inst);
>              imm->must_promote = must_promote_imm(devinfo, inst);
>              imm->first_use_ip = ip;
> @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
>                        imm->block->last_non_control_flow_inst()->next);
>        const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
>  
> -      ibld.MOV(reg, brw_imm_f(imm->val));
> +      reg = retype(reg, imm->type);
> +      if (imm->type == BRW_REGISTER_TYPE_F) {
> +         ibld.MOV(reg, brw_imm_f(imm->val));
> +      } else {
> +         const uint16_t val_hf = _mesa_float_to_half(imm->val);
> +         ibld.MOV(reg, retype(brw_imm_uw(val_hf), BRW_REGISTER_TYPE_HF));
> +      }
>        imm->nr = reg.nr;
>        imm->subreg_offset = reg.offset;
>  
> +      /* Keep offsets 32-bit aligned since we are mixing 32-bit and 16-bit
> +       * constants into the same register
> +       *
> +       * TODO: try to pack pairs of HF constants into each 32-bit slot
> +       */
>        reg.offset += sizeof(float);
> -      if (reg.offset == 8 * sizeof(float)) {
> +      if (reg.offset == REG_SIZE) {
>           reg.nr = alloc.allocate(1);
>           reg.offset = 0;
>        }
> @@ -296,11 +332,13 @@ fs_visitor::opt_combine_constants()
>           fs_reg *reg = link->reg;
>           reg->file = VGRF;
>           reg->nr = table.imm[i].nr;
> +         reg->type = table.imm[i].type;
>           reg->offset = table.imm[i].subreg_offset;
>           reg->stride = 0;
> -         reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
> +         reg->negate = needs_negate(reg->f, table.imm[i].val, table.imm[i].type);
>           assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> -                fabsf(reg->f) == fabs(table.imm[i].val));
> +                fabsf(reg->f) == fabs(table.imm[i].val) ||
> +                table.imm[i].type == BRW_REGISTER_TYPE_HF);
>        }
>     }
>  
> -- 
> 2.17.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Wed, 2019-01-02 at 13:02 +0200, Pohjolainen, Topi wrote:
> On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> > ---
> >  .../compiler/brw_fs_combine_constants.cpp     | 60
> > +++++++++++++++----
> >  1 file changed, 49 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > index e0c95d379b8..24307e365ab 100644
> > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > @@ -36,6 +36,7 @@
> >  
> >  #include "brw_fs.h"
> >  #include "brw_cfg.h"
> > +#include "util/half_float.h"
> >  
> >  using namespace brw;
> >  
> > @@ -114,8 +115,9 @@ struct imm {
> >      */
> >     exec_list *uses;
> >  
> > -   /** The immediate value.  We currently only handle floats. */
> > +   /** The immediate value.  We currently only handle float and
> > half-float. */
> >     float val;
> > +   brw_reg_type type;
> >  
> >     /**
> >      * The GRF register and subregister number where we've decided
> > to store the
> > @@ -145,10 +147,10 @@ struct table {
> >  };
> >  
> >  static struct imm *
> > -find_imm(struct table *table, float val)
> > +find_imm(struct table *table, float val, brw_reg_type type)
> >  {
> >     for (int i = 0; i < table->len; i++) {
> > -      if (table->imm[i].val == val) {
> > +      if (table->imm[i].val == val && table->imm[i].type == type)
> > {
> >           return &table->imm[i];
> >        }
> >     }
> > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> >     return a->first_use_ip - b->first_use_ip;
> >  }
> >  
> > +static bool
> > +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> > +{
> > +   /* reg_val represents the immediate value in the register in
> > its original
> > +    * bit-size, while imm_val is always a valid 32-bit float
> > value.
> > +    */
> > +   if (type == BRW_REGISTER_TYPE_HF) {
> > +      uint32_t reg_val_ud = *((uint32_t *) &reg_val);
> 
> Casting "float" to uint32_t and reading then only 16-bits from it
> looks a
> little ugly. Could we use "uint32_t reg_val" and then below in the
> caller
> use "reg->u" instead of "reg->f"?

No, because the sigbit macro used below works on floating point values,
so if we did that, we would have to do a pointer cast to float before
we call that macro with reg_val.

One thing we can do is to pass the fs_reg instead of reg->f and then
read reg->f or reg->u as we need inside this function. We would be
abusing a bit the interface but we would get rid of the casting that
way... I am not sure if I like it better or worse.

Alternatively, we could just memcpy reg_val into a uint32_t to avoid
the pointer cast. Maybe that looks less ugly.

What do you think?

> > +      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> > +   }
> > +
> > +   return signbit(imm_val) != signbit(reg_val);
> > +}
> > +
> >  bool
> >  fs_visitor::opt_combine_constants()
> >  {
> > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
> >  
> >        for (int i = 0; i < inst->sources; i++) {
> >           if (inst->src[i].file != IMM ||
> > -             inst->src[i].type != BRW_REGISTER_TYPE_F)
> > +             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> > +              inst->src[i].type != BRW_REGISTER_TYPE_HF))
> >              continue;
> >  
> > -         float val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > -                     fabs(inst->src[i].f);
> > -         struct imm *imm = find_imm(&table, val);
> > +         float val;
> > +         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> > +            val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > +                        fabs(inst->src[i].f);
> > +         } else {
> > +            val = !inst->can_do_source_mods(devinfo) ?
> > +               _mesa_half_to_float(inst->src[i].d & 0xffff) :
> > +               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> > +         }
> > +         struct imm *imm = find_imm(&table, val, inst-
> > >src[i].type);
> >  
> >           if (imm) {
> >              bblock_t *intersection = cfg_t::intersect(block, imm-
> > >block);
> > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> >              imm->uses = new(const_ctx) exec_list();
> >              imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> >              imm->val = val;
> > +            imm->type = inst->src[i].type;
> >              imm->uses_by_coissue = could_coissue(devinfo, inst);
> >              imm->must_promote = must_promote_imm(devinfo, inst);
> >              imm->first_use_ip = ip;
> > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> >                        imm->block->last_non_control_flow_inst()-
> > >next);
> >        const fs_builder ibld = bld.at(imm->block,
> > n).exec_all().group(1, 0);
> >  
> > -      ibld.MOV(reg, brw_imm_f(imm->val));
> > +      reg = retype(reg, imm->type);
> > +      if (imm->type == BRW_REGISTER_TYPE_F) {
> > +         ibld.MOV(reg, brw_imm_f(imm->val));
> > +      } else {
> > +         const uint16_t val_hf = _mesa_float_to_half(imm->val);
> > +         ibld.MOV(reg, retype(brw_imm_uw(val_hf),
> > BRW_REGISTER_TYPE_HF));
> > +      }
> >        imm->nr = reg.nr;
> >        imm->subreg_offset = reg.offset;
> >  
> > +      /* Keep offsets 32-bit aligned since we are mixing 32-bit
> > and 16-bit
> > +       * constants into the same register
> > +       *
> > +       * TODO: try to pack pairs of HF constants into each 32-bit
> > slot
> > +       */
> >        reg.offset += sizeof(float);
> > -      if (reg.offset == 8 * sizeof(float)) {
> > +      if (reg.offset == REG_SIZE) {
> >           reg.nr = alloc.allocate(1);
> >           reg.offset = 0;
> >        }
> > @@ -296,11 +332,13 @@ fs_visitor::opt_combine_constants()
> >           fs_reg *reg = link->reg;
> >           reg->file = VGRF;
> >           reg->nr = table.imm[i].nr;
> > +         reg->type = table.imm[i].type;
> >           reg->offset = table.imm[i].subreg_offset;
> >           reg->stride = 0;
> > -         reg->negate = signbit(reg->f) !=
> > signbit(table.imm[i].val);
> > +         reg->negate = needs_negate(reg->f, table.imm[i].val,
> > table.imm[i].type);
> >           assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> > -                fabsf(reg->f) == fabs(table.imm[i].val));
> > +                fabsf(reg->f) == fabs(table.imm[i].val) ||
> > +                table.imm[i].type == BRW_REGISTER_TYPE_HF);
> >        }
> >     }
> >  
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
>
On Thu, Jan 03, 2019 at 08:49:48AM +0100, Iago Toral wrote:
> On Wed, 2019-01-02 at 13:02 +0200, Pohjolainen, Topi wrote:
> > On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> > > ---
> > >  .../compiler/brw_fs_combine_constants.cpp     | 60
> > > +++++++++++++++----
> > >  1 file changed, 49 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > > index e0c95d379b8..24307e365ab 100644
> > > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > > @@ -36,6 +36,7 @@
> > >  
> > >  #include "brw_fs.h"
> > >  #include "brw_cfg.h"
> > > +#include "util/half_float.h"
> > >  
> > >  using namespace brw;
> > >  
> > > @@ -114,8 +115,9 @@ struct imm {
> > >      */
> > >     exec_list *uses;
> > >  
> > > -   /** The immediate value.  We currently only handle floats. */
> > > +   /** The immediate value.  We currently only handle float and
> > > half-float. */
> > >     float val;
> > > +   brw_reg_type type;
> > >  
> > >     /**
> > >      * The GRF register and subregister number where we've decided
> > > to store the
> > > @@ -145,10 +147,10 @@ struct table {
> > >  };
> > >  
> > >  static struct imm *
> > > -find_imm(struct table *table, float val)
> > > +find_imm(struct table *table, float val, brw_reg_type type)
> > >  {
> > >     for (int i = 0; i < table->len; i++) {
> > > -      if (table->imm[i].val == val) {
> > > +      if (table->imm[i].val == val && table->imm[i].type == type)
> > > {
> > >           return &table->imm[i];
> > >        }
> > >     }
> > > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> > >     return a->first_use_ip - b->first_use_ip;
> > >  }
> > >  
> > > +static bool
> > > +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> > > +{
> > > +   /* reg_val represents the immediate value in the register in
> > > its original
> > > +    * bit-size, while imm_val is always a valid 32-bit float
> > > value.
> > > +    */
> > > +   if (type == BRW_REGISTER_TYPE_HF) {
> > > +      uint32_t reg_val_ud = *((uint32_t *) &reg_val);
> > 
> > Casting "float" to uint32_t and reading then only 16-bits from it
> > looks a
> > little ugly. Could we use "uint32_t reg_val" and then below in the
> > caller
> > use "reg->u" instead of "reg->f"?
> 
> No, because the sigbit macro used below works on floating point values,
> so if we did that, we would have to do a pointer cast to float before
> we call that macro with reg_val.

That is true, a cast is needed in any case. I just think using uint32_t is a
little cleaner as the "bag of bits"-type that is then interpreted either as
float or float16.

> 
> One thing we can do is to pass the fs_reg instead of reg->f and then
> read reg->f or reg->u as we need inside this function. We would be
> abusing a bit the interface but we would get rid of the casting that
> way... I am not sure if I like it better or worse.
> 
> Alternatively, we could just memcpy reg_val into a uint32_t to avoid
> the pointer cast. Maybe that looks less ugly.
> 
> What do you think?

I guess my concern is passing in a float and then interpreting it as float16.
Like said above I'm more used to having it as uint in such case where one just
takes the lower 16 bits and casts them. But this is really just nitpicking,
lets go with what you have here.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

> 
> > > +      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> > > +   }
> > > +
> > > +   return signbit(imm_val) != signbit(reg_val);
> > > +}
> > > +
> > >  bool
> > >  fs_visitor::opt_combine_constants()
> > >  {
> > > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
> > >  
> > >        for (int i = 0; i < inst->sources; i++) {
> > >           if (inst->src[i].file != IMM ||
> > > -             inst->src[i].type != BRW_REGISTER_TYPE_F)
> > > +             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> > > +              inst->src[i].type != BRW_REGISTER_TYPE_HF))
> > >              continue;
> > >  
> > > -         float val = !inst->can_do_source_mods(devinfo) ? inst-
> > > >src[i].f :
> > > -                     fabs(inst->src[i].f);
> > > -         struct imm *imm = find_imm(&table, val);
> > > +         float val;
> > > +         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> > > +            val = !inst->can_do_source_mods(devinfo) ? inst-
> > > >src[i].f :
> > > +                        fabs(inst->src[i].f);
> > > +         } else {
> > > +            val = !inst->can_do_source_mods(devinfo) ?
> > > +               _mesa_half_to_float(inst->src[i].d & 0xffff) :
> > > +               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> > > +         }
> > > +         struct imm *imm = find_imm(&table, val, inst-
> > > >src[i].type);
> > >  
> > >           if (imm) {
> > >              bblock_t *intersection = cfg_t::intersect(block, imm-
> > > >block);
> > > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> > >              imm->uses = new(const_ctx) exec_list();
> > >              imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> > >              imm->val = val;
> > > +            imm->type = inst->src[i].type;
> > >              imm->uses_by_coissue = could_coissue(devinfo, inst);
> > >              imm->must_promote = must_promote_imm(devinfo, inst);
> > >              imm->first_use_ip = ip;
> > > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> > >                        imm->block->last_non_control_flow_inst()-
> > > >next);
> > >        const fs_builder ibld = bld.at(imm->block,
> > > n).exec_all().group(1, 0);
> > >  
> > > -      ibld.MOV(reg, brw_imm_f(imm->val));
> > > +      reg = retype(reg, imm->type);
> > > +      if (imm->type == BRW_REGISTER_TYPE_F) {
> > > +         ibld.MOV(reg, brw_imm_f(imm->val));
> > > +      } else {
> > > +         const uint16_t val_hf = _mesa_float_to_half(imm->val);
> > > +         ibld.MOV(reg, retype(brw_imm_uw(val_hf),
> > > BRW_REGISTER_TYPE_HF));
> > > +      }
> > >        imm->nr = reg.nr;
> > >        imm->subreg_offset = reg.offset;
> > >  
> > > +      /* Keep offsets 32-bit aligned since we are mixing 32-bit
> > > and 16-bit
> > > +       * constants into the same register
> > > +       *
> > > +       * TODO: try to pack pairs of HF constants into each 32-bit
> > > slot
> > > +       */
> > >        reg.offset += sizeof(float);
> > > -      if (reg.offset == 8 * sizeof(float)) {
> > > +      if (reg.offset == REG_SIZE) {
> > >           reg.nr = alloc.allocate(1);
> > >           reg.offset = 0;
> > >        }
> > > @@ -296,11 +332,13 @@ fs_visitor::opt_combine_constants()
> > >           fs_reg *reg = link->reg;
> > >           reg->file = VGRF;
> > >           reg->nr = table.imm[i].nr;
> > > +         reg->type = table.imm[i].type;
> > >           reg->offset = table.imm[i].subreg_offset;
> > >           reg->stride = 0;
> > > -         reg->negate = signbit(reg->f) !=
> > > signbit(table.imm[i].val);
> > > +         reg->negate = needs_negate(reg->f, table.imm[i].val,
> > > table.imm[i].type);
> > >           assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> > > -                fabsf(reg->f) == fabs(table.imm[i].val));
> > > +                fabsf(reg->f) == fabs(table.imm[i].val) ||
> > > +                table.imm[i].type == BRW_REGISTER_TYPE_HF);
> > >        }
> > >     }
> > >  
> > > -- 
> > > 2.17.1
> > > 
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> > 
>