[Mesa-dev,1/2] gallium: add TGSI_OPCODE_FMA (v2)

Submitted by Marek Olšák on March 5, 2015, 4:15 p.m.

Details

Message ID 1425572145-3908-1-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák March 5, 2015, 4:15 p.m.
From: Marek Olšák <marek.olsak@amd.com>

Needed by ARB_gpu_shader5.

v2: select DMAD for FMA with double precision; it can be later changed to DFMA
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h    |  1 +
 src/gallium/auxiliary/tgsi/tgsi_exec.h           |  1 +
 src/gallium/auxiliary/tgsi/tgsi_info.c           |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_util.c           |  1 +
 src/gallium/docs/source/screen.rst               |  1 +
 src/gallium/docs/source/tgsi.rst                 | 15 +++++++++++++++
 src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
 src/gallium/drivers/i915/i915_screen.c           |  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
 src/gallium/drivers/r300/r300_screen.c           |  2 ++
 src/gallium/drivers/r600/r600_pipe.c             |  1 +
 src/gallium/drivers/r600/r600_shader.c           |  6 +++---
 src/gallium/drivers/radeonsi/si_pipe.c           |  1 +
 src/gallium/drivers/svga/svga_screen.c           |  2 ++
 src/gallium/drivers/vc4/vc4_screen.c             |  1 +
 src/gallium/include/pipe/p_defines.h             |  1 +
 src/gallium/include/pipe/p_shader_tokens.h       |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp       | 13 +++++++++----
 20 files changed, 47 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 2962360..c5c51c1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -129,6 +129,7 @@  gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    }
    /* if we get here, we missed a shader cap above (and should have seen
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 609c81b..0e59b88 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -459,6 +459,7 @@  tgsi_exec_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    }
    /* if we get here, we missed a shader cap above (and should have seen
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 4d838fd..e6e0a60 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -56,7 +56,7 @@  static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
    { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
    { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 19 },      /* removed */
+   { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
    { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
    { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
    { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index d572ff0..e5b8427 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -193,6 +193,7 @@  tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
    case TGSI_OPCODE_MAD:
    case TGSI_OPCODE_SUB:
    case TGSI_OPCODE_LRP:
+   case TGSI_OPCODE_FMA:
    case TGSI_OPCODE_FRC:
    case TGSI_OPCODE_CEIL:
    case TGSI_OPCODE_CLAMP:
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index e0fd1a2..dd7a012 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -336,6 +336,7 @@  to be 0.
   is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
 * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
   DLDEXP are supported.
+* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether TGSI_OPCODE_FMA is supported.
 
 
 .. _pipe_compute_cap:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index b0a975a..112df57 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -272,6 +272,21 @@  This instruction replicates its result.
   dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
 
 
+.. opcode:: FMA - Fused Multiply-Add
+
+Perform a * b + c with no intermediate rounding step.
+
+.. math::
+
+  dst.x = src0.x \times src1.x + src2.x
+
+  dst.y = src0.y \times src1.y + src2.y
+
+  dst.z = src0.z \times src1.z + src2.z
+
+  dst.w = src0.w \times src1.w + src2.w
+
+
 .. opcode:: DP2A - 2-component Dot Product And Add
 
 .. math::
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 2973458..d617465 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -362,6 +362,7 @@  fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
 	case PIPE_SHADER_CAP_DOUBLES:
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 		return 0;
 	case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
 		return 1;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index dc76464..50847e2 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -158,6 +158,7 @@  i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 0fca9e0..eeb7148 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -250,6 +250,7 @@  nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("unknown vertex shader param %d\n", param);
@@ -289,6 +290,7 @@  nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       default:
          debug_printf("unknown fragment shader param %d\n", param);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ed07ba4..829dfbc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -289,6 +289,7 @@  nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 686d892..04c34f5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -295,6 +295,7 @@  nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
       return 0;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       return 16; /* would be 32 in linked (OpenGL-style) mode */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index fca8001..752d7e5 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -287,6 +287,7 @@  static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
             return 0;
         case PIPE_SHADER_CAP_PREFERRED_IR:
             return PIPE_SHADER_IR_TGSI;
@@ -341,6 +342,7 @@  static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
             return 0;
         case PIPE_SHADER_CAP_PREFERRED_IR:
             return PIPE_SHADER_IR_TGSI;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c8a0e9c..2670e53 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -493,6 +493,7 @@  static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 		return 0;
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 		return 0;
 	}
 	return 0;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 2ee59c8..54540c3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7295,7 +7295,7 @@  static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
 	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
 	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
-	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7494,7 +7494,7 @@  static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
 	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
 	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
-	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7693,7 +7693,7 @@  static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
 	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
 	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
-	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
 	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
 	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index f1a5388..0aacab1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -425,6 +425,7 @@  static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 	case PIPE_SHADER_CAP_DOUBLES:
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 		return 0;
 	}
 	return 0;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index bac0dbc..7b01d35 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -375,6 +375,7 @@  static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
@@ -431,6 +432,7 @@  static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
       case PIPE_SHADER_CAP_DOUBLES:
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 7c62847..0be8ec2 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -319,6 +319,7 @@  vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         case PIPE_SHADER_CAP_DOUBLES:
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
                 return 0;
         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index a8ffe9c..67f48e4 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -644,6 +644,7 @@  enum pipe_shader_cap
    PIPE_SHADER_CAP_DOUBLES,
    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
+   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
 };
 
 /**
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 95ac590..a64686a 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -306,7 +306,7 @@  struct tgsi_property_data {
 #define TGSI_OPCODE_MAD                 16
 #define TGSI_OPCODE_SUB                 17
 #define TGSI_OPCODE_LRP                 18
-                                /* gap */
+#define TGSI_OPCODE_FMA                 19
 #define TGSI_OPCODE_SQRT                20
 #define TGSI_OPCODE_DP2A                21
                                 /* gap */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index bd191d8..0a0ab61 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -332,6 +332,7 @@  public:
    int glsl_version;
    bool native_integers;
    bool have_sqrt;
+   bool have_fma;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -836,6 +837,7 @@  glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
       case3fid(ADD, UADD, DADD);
       case3fid(MUL, UMUL, DMUL);
       case3fid(MAD, UMAD, DMAD);
+      case3fid(FMA, UMAD, DMAD);
       case3(DIV, IDIV, UDIV);
       case4d(MAX, IMAX, UMAX, DMAX);
       case4d(MIN, IMIN, UMIN, DMIN);
@@ -2222,10 +2224,11 @@  glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
       break;
    case ir_triop_fma:
-      /* NOTE: Perhaps there should be a special opcode that enforces fused
-       * mul-add. Just use MAD for now.
-       */
-      emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+      /* In theory, MAD is incorrect here. */
+      if (have_fma)
+         emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
+      else
+         emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
       break;
    case ir_unop_interpolate_at_centroid:
       emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
@@ -5564,6 +5567,8 @@  get_mesa_program(struct gl_context *ctx,
 
    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
+   v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
+                                           PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
 
    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,

Comments

Series is:

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>

On Thu, Mar 5, 2015 at 11:15 AM, Marek Olšák <maraeo@gmail.com> wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> Needed by ARB_gpu_shader5.
>
> v2: select DMAD for FMA with double precision; it can be later changed to DFMA
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_limits.h    |  1 +
>  src/gallium/auxiliary/tgsi/tgsi_exec.h           |  1 +
>  src/gallium/auxiliary/tgsi/tgsi_info.c           |  2 +-
>  src/gallium/auxiliary/tgsi/tgsi_util.c           |  1 +
>  src/gallium/docs/source/screen.rst               |  1 +
>  src/gallium/docs/source/tgsi.rst                 | 15 +++++++++++++++
>  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
>  src/gallium/drivers/i915/i915_screen.c           |  1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  2 ++
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
>  src/gallium/drivers/r300/r300_screen.c           |  2 ++
>  src/gallium/drivers/r600/r600_pipe.c             |  1 +
>  src/gallium/drivers/r600/r600_shader.c           |  6 +++---
>  src/gallium/drivers/radeonsi/si_pipe.c           |  1 +
>  src/gallium/drivers/svga/svga_screen.c           |  2 ++
>  src/gallium/drivers/vc4/vc4_screen.c             |  1 +
>  src/gallium/include/pipe/p_defines.h             |  1 +
>  src/gallium/include/pipe/p_shader_tokens.h       |  2 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp       | 13 +++++++++----
>  20 files changed, 47 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> index 2962360..c5c51c1 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> @@ -129,6 +129,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
>     case PIPE_SHADER_CAP_DOUBLES:
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     }
>     /* if we get here, we missed a shader cap above (and should have seen
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index 609c81b..0e59b88 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -459,6 +459,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
>        return 1;
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     }
>     /* if we get here, we missed a shader cap above (and should have seen
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 4d838fd..e6e0a60 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -56,7 +56,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
>     { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
>     { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> -   { 0, 0, 0, 0, 0, 0, NONE, "", 19 },      /* removed */
> +   { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
>     { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
>     { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
>     { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
> index d572ff0..e5b8427 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_util.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
> @@ -193,6 +193,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
>     case TGSI_OPCODE_MAD:
>     case TGSI_OPCODE_SUB:
>     case TGSI_OPCODE_LRP:
> +   case TGSI_OPCODE_FMA:
>     case TGSI_OPCODE_FRC:
>     case TGSI_OPCODE_CEIL:
>     case TGSI_OPCODE_CLAMP:
> diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
> index e0fd1a2..dd7a012 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -336,6 +336,7 @@ to be 0.
>    is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
>  * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
>    DLDEXP are supported.
> +* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether TGSI_OPCODE_FMA is supported.
>
>
>  .. _pipe_compute_cap:
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index b0a975a..112df57 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -272,6 +272,21 @@ This instruction replicates its result.
>    dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
>
>
> +.. opcode:: FMA - Fused Multiply-Add
> +
> +Perform a * b + c with no intermediate rounding step.
> +
> +.. math::
> +
> +  dst.x = src0.x \times src1.x + src2.x
> +
> +  dst.y = src0.y \times src1.y + src2.y
> +
> +  dst.z = src0.z \times src1.z + src2.z
> +
> +  dst.w = src0.w \times src1.w + src2.w
> +
> +
>  .. opcode:: DP2A - 2-component Dot Product And Add
>
>  .. math::
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 2973458..d617465 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -362,6 +362,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>         case PIPE_SHADER_CAP_DOUBLES:
>         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>                 return 0;
>         case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
>                 return 1;
> diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
> index dc76464..50847e2 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -158,6 +158,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> index 0fca9e0..eeb7148 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> @@ -250,6 +250,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("unknown vertex shader param %d\n", param);
> @@ -289,6 +290,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("unknown fragment shader param %d\n", param);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> index ed07ba4..829dfbc 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> @@ -289,6 +289,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>     case PIPE_SHADER_CAP_DOUBLES:
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     default:
>        NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 686d892..04c34f5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -295,6 +295,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        return 1;
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
>        return 16; /* would be 32 in linked (OpenGL-style) mode */
> diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
> index fca8001..752d7e5 100644
> --- a/src/gallium/drivers/r300/r300_screen.c
> +++ b/src/gallium/drivers/r300/r300_screen.c
> @@ -287,6 +287,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>              return 0;
>          case PIPE_SHADER_CAP_PREFERRED_IR:
>              return PIPE_SHADER_IR_TGSI;
> @@ -341,6 +342,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>              return 0;
>          case PIPE_SHADER_CAP_PREFERRED_IR:
>              return PIPE_SHADER_IR_TGSI;
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index c8a0e9c..2670e53 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -493,6 +493,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
>                 return 0;
>         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>                 return 0;
>         }
>         return 0;
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 2ee59c8..54540c3 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -7295,7 +7295,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
>         {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
>         {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
>         {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
> -       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
> +       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
>         {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
>         {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
>         {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -7494,7 +7494,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
>         {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
>         {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
>         {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
> -       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
> +       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
>         {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
>         {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
>         {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -7693,7 +7693,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
>         {TGSI_OPCODE_MAD,       1, ALU_OP3_MULADD, tgsi_op3},
>         {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
>         {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
> -       {19,                    0, ALU_OP0_NOP, tgsi_unsupported},
> +       {TGSI_OPCODE_FMA,       0, ALU_OP0_NOP, tgsi_unsupported},
>         {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
>         {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
>         {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index f1a5388..0aacab1 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -425,6 +425,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>         case PIPE_SHADER_CAP_DOUBLES:
>         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>                 return 0;
>         }
>         return 0;
> diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
> index bac0dbc..7b01d35 100644
> --- a/src/gallium/drivers/svga/svga_screen.c
> +++ b/src/gallium/drivers/svga/svga_screen.c
> @@ -375,6 +375,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        }
>        /* If we get here, we failed to handle a cap above */
> @@ -431,6 +432,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        }
>        /* If we get here, we failed to handle a cap above */
> diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
> index 7c62847..0be8ec2 100644
> --- a/src/gallium/drivers/vc4/vc4_screen.c
> +++ b/src/gallium/drivers/vc4/vc4_screen.c
> @@ -319,6 +319,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>                  return 0;
>          case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
>          case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
> diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
> index a8ffe9c..67f48e4 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -644,6 +644,7 @@ enum pipe_shader_cap
>     PIPE_SHADER_CAP_DOUBLES,
>     PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
>     PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
> +   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
>  };
>
>  /**
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 95ac590..a64686a 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -306,7 +306,7 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_MAD                 16
>  #define TGSI_OPCODE_SUB                 17
>  #define TGSI_OPCODE_LRP                 18
> -                                /* gap */
> +#define TGSI_OPCODE_FMA                 19
>  #define TGSI_OPCODE_SQRT                20
>  #define TGSI_OPCODE_DP2A                21
>                                  /* gap */
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index bd191d8..0a0ab61 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -332,6 +332,7 @@ public:
>     int glsl_version;
>     bool native_integers;
>     bool have_sqrt;
> +   bool have_fma;
>
>     variable_storage *find_variable_storage(ir_variable *var);
>
> @@ -836,6 +837,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
>        case3fid(ADD, UADD, DADD);
>        case3fid(MUL, UMUL, DMUL);
>        case3fid(MAD, UMAD, DMAD);
> +      case3fid(FMA, UMAD, DMAD);
>        case3(DIV, IDIV, UDIV);
>        case4d(MAX, IMAX, UMAX, DMAX);
>        case4d(MIN, IMIN, UMIN, DMIN);
> @@ -2222,10 +2224,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
>        break;
>     case ir_triop_fma:
> -      /* NOTE: Perhaps there should be a special opcode that enforces fused
> -       * mul-add. Just use MAD for now.
> -       */
> -      emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
> +      /* In theory, MAD is incorrect here. */
> +      if (have_fma)
> +         emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
> +      else
> +         emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
>        break;
>     case ir_unop_interpolate_at_centroid:
>        emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
> @@ -5564,6 +5567,8 @@ get_mesa_program(struct gl_context *ctx,
>
>     v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
>                                              PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
> +   v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
> +                                           PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
>
>     _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
>     _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev