[v3,17/42] intel/compiler: add new half-float register type for 3-src instructions

Submitted by Iago Toral Quiroga on Jan. 15, 2019, 1:53 p.m.

Details

Message ID 20190115135414.2313-18-itoral@igalia.com
State New
Headers show
Series "intel: VK_KHR_shader_float16_int8 implementation" ( rev: 5 4 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Jan. 15, 2019, 1:53 p.m.
This is available since gen8.

v2: restore previously existing assertion.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (v1)
---
 src/intel/compiler/brw_reg_type.c | 36 +++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c
index 60240ba1513..09b3ea61d4c 100644
--- a/src/intel/compiler/brw_reg_type.c
+++ b/src/intel/compiler/brw_reg_type.c
@@ -138,6 +138,7 @@  enum hw_3src_reg_type {
    GEN7_3SRC_TYPE_D  = 1,
    GEN7_3SRC_TYPE_UD = 2,
    GEN7_3SRC_TYPE_DF = 3,
+   GEN8_3SRC_TYPE_HF = 4,
 
    /** When ExecutionDatatype is 1: @{ */
    GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
@@ -166,6 +167,14 @@  static const struct hw_3src_type {
    [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
    [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
    [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
+}, gen8_hw_3src_type[] = {
+   [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
+
+   [BRW_REGISTER_TYPE_F]  = { GEN7_3SRC_TYPE_F  },
+   [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
+   [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
+   [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
+   [BRW_REGISTER_TYPE_HF] = { GEN8_3SRC_TYPE_HF },
 }, gen10_hw_3src_align1_type[] = {
 #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
    [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
@@ -249,6 +258,20 @@  brw_hw_type_to_reg_type(const struct gen_device_info *devinfo,
    unreachable("not reached");
 }
 
+static inline const struct hw_3src_type *
+get_hw_3src_type_map(const struct gen_device_info *devinfo, uint32_t *size)
+{
+   if (devinfo->gen < 8) {
+      if (size)
+         *size = ARRAY_SIZE(gen7_hw_3src_type);
+      return gen7_hw_3src_type;
+   } else {
+      if (size)
+         *size = ARRAY_SIZE(gen8_hw_3src_type);
+      return gen8_hw_3src_type;
+   }
+}
+
 /**
  * Convert a brw_reg_type enumeration value into the hardware representation
  * for a 3-src align16 instruction
@@ -257,9 +280,12 @@  unsigned
 brw_reg_type_to_a16_hw_3src_type(const struct gen_device_info *devinfo,
                                  enum brw_reg_type type)
 {
-   assert(type < ARRAY_SIZE(gen7_hw_3src_type));
-   assert(gen7_hw_3src_type[type].reg_type != (enum hw_3src_reg_type)INVALID);
-   return gen7_hw_3src_type[type].reg_type;
+   uint32_t map_size;
+   const struct hw_3src_type *hw_3src_type_map =
+      get_hw_3src_type_map(devinfo, &map_size);
+   assert(type < map_size);
+   assert(hw_3src_type_map[type].reg_type != (enum hw_3src_reg_type)INVALID);
+   return hw_3src_type_map[type].reg_type;
 }
 
 /**
@@ -283,8 +309,10 @@  enum brw_reg_type
 brw_a16_hw_3src_type_to_reg_type(const struct gen_device_info *devinfo,
                                  unsigned hw_type)
 {
+   const struct hw_3src_type *hw_3src_type_map =
+      get_hw_3src_type_map(devinfo, NULL);
    for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
-      if (gen7_hw_3src_type[i].reg_type == hw_type) {
+      if (hw_3src_type_map[i].reg_type == hw_type) {
          return i;
       }
    }

Comments

On Tue, Jan 15, 2019 at 5:55 AM Iago Toral Quiroga <itoral@igalia.com> wrote:
>
> This is available since gen8.
>
> v2: restore previously existing assertion.
>
> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (v1)
> ---
>  src/intel/compiler/brw_reg_type.c | 36 +++++++++++++++++++++++++++----
>  1 file changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c
> index 60240ba1513..09b3ea61d4c 100644
> --- a/src/intel/compiler/brw_reg_type.c
> +++ b/src/intel/compiler/brw_reg_type.c
> @@ -138,6 +138,7 @@ enum hw_3src_reg_type {
>     GEN7_3SRC_TYPE_D  = 1,
>     GEN7_3SRC_TYPE_UD = 2,
>     GEN7_3SRC_TYPE_DF = 3,
> +   GEN8_3SRC_TYPE_HF = 4,
>
>     /** When ExecutionDatatype is 1: @{ */
>     GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
> @@ -166,6 +167,14 @@ static const struct hw_3src_type {
>     [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
>     [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
>     [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> +}, gen8_hw_3src_type[] = {
> +   [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> +
> +   [BRW_REGISTER_TYPE_F]  = { GEN7_3SRC_TYPE_F  },
> +   [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> +   [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> +   [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> +   [BRW_REGISTER_TYPE_HF] = { GEN8_3SRC_TYPE_HF },
>  }, gen10_hw_3src_align1_type[] = {
>  #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
>     [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> @@ -249,6 +258,20 @@ brw_hw_type_to_reg_type(const struct gen_device_info *devinfo,
>     unreachable("not reached");
>  }
>
> +static inline const struct hw_3src_type *
> +get_hw_3src_type_map(const struct gen_device_info *devinfo, uint32_t *size)
> +{
> +   if (devinfo->gen < 8) {
> +      if (size)
> +         *size = ARRAY_SIZE(gen7_hw_3src_type);
> +      return gen7_hw_3src_type;
> +   } else {
> +      if (size)
> +         *size = ARRAY_SIZE(gen8_hw_3src_type);
> +      return gen8_hw_3src_type;
> +   }
> +}

I would rather inline this code and remove the function, like we
already do for example:

   const struct hw_type *table;

   if (devinfo->gen >= 11) {
      assert(type < ARRAY_SIZE(gen11_hw_type));
      table = gen11_hw_type;
   } else {
      assert(type < ARRAY_SIZE(gen4_hw_type));
      table = gen4_hw_type;
   }

But I'm not even sure that separate gen7 vs gen8 tables are required,
since gen8 just adds one additional value. I thought we had some code
that essentially did assert(devinfo->gen >= 8 || type !=
BRW_REGISTER_TYPE_HF), but I don't see it now.

We have checks that Q/UQ/DF are only used when 64-bit hw support is
available, so maybe that's what I'm thinking of.
On Tue, 2019-01-22 at 15:46 -0800, Matt Turner wrote:
> On Tue, Jan 15, 2019 at 5:55 AM Iago Toral Quiroga <itoral@igalia.com
> > wrote:
> > 
> > This is available since gen8.
> > 
> > v2: restore previously existing assertion.
> > 
> > Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (v1)
> > ---
> >  src/intel/compiler/brw_reg_type.c | 36
> > +++++++++++++++++++++++++++----
> >  1 file changed, 32 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/intel/compiler/brw_reg_type.c
> > b/src/intel/compiler/brw_reg_type.c
> > index 60240ba1513..09b3ea61d4c 100644
> > --- a/src/intel/compiler/brw_reg_type.c
> > +++ b/src/intel/compiler/brw_reg_type.c
> > @@ -138,6 +138,7 @@ enum hw_3src_reg_type {
> >     GEN7_3SRC_TYPE_D  = 1,
> >     GEN7_3SRC_TYPE_UD = 2,
> >     GEN7_3SRC_TYPE_DF = 3,
> > +   GEN8_3SRC_TYPE_HF = 4,
> > 
> >     /** When ExecutionDatatype is 1: @{ */
> >     GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000,
> > @@ -166,6 +167,14 @@ static const struct hw_3src_type {
> >     [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> >     [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> >     [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> > +}, gen8_hw_3src_type[] = {
> > +   [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> > +
> > +   [BRW_REGISTER_TYPE_F]  = { GEN7_3SRC_TYPE_F  },
> > +   [BRW_REGISTER_TYPE_D]  = { GEN7_3SRC_TYPE_D  },
> > +   [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD },
> > +   [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF },
> > +   [BRW_REGISTER_TYPE_HF] = { GEN8_3SRC_TYPE_HF },
> >  }, gen10_hw_3src_align1_type[] = {
> >  #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
> >     [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
> > @@ -249,6 +258,20 @@ brw_hw_type_to_reg_type(const struct
> > gen_device_info *devinfo,
> >     unreachable("not reached");
> >  }
> > 
> > +static inline const struct hw_3src_type *
> > +get_hw_3src_type_map(const struct gen_device_info *devinfo,
> > uint32_t *size)
> > +{
> > +   if (devinfo->gen < 8) {
> > +      if (size)
> > +         *size = ARRAY_SIZE(gen7_hw_3src_type);
> > +      return gen7_hw_3src_type;
> > +   } else {
> > +      if (size)
> > +         *size = ARRAY_SIZE(gen8_hw_3src_type);
> > +      return gen8_hw_3src_type;
> > +   }
> > +}
> 
> I would rather inline this code and remove the function, like we
> already do for example:
> 
>    const struct hw_type *table;
> 
>    if (devinfo->gen >= 11) {
>       assert(type < ARRAY_SIZE(gen11_hw_type));
>       table = gen11_hw_type;
>    } else {
>       assert(type < ARRAY_SIZE(gen4_hw_type));
>       table = gen4_hw_type;
>    }
> 
> But I'm not even sure that separate gen7 vs gen8 tables are required,
> since gen8 just adds one additional value. I thought we had some code
> that essentially did assert(devinfo->gen >= 8 || type !=
> BRW_REGISTER_TYPE_HF), but I don't see it now.

I am liking the idea though, as you say, there is only one value that
changes after all... should I make that change? (and rename the table
to be prefixed by gen8_ instead of gen_7.

> We have checks that Q/UQ/DF are only used when 64-bit hw support is
> available, so maybe that's what I'm thinking of.