ac/nir: implement emit_{imul, umul}_2x32_64 opcodes

Submitted by Samuel Pitoiset on March 5, 2019, 9:33 a.m.

Details

Message ID 20190305093305.23265-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "ac/nir: implement emit_{imul, umul}_2x32_64 opcodes" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset March 5, 2019, 9:33 a.m.
Fixes: 58bcebd987b ("spirv: Allow [i/u]mulExtended to use new nir opcode")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/common/ac_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index af7a95137c2..74ae690e845 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -423,6 +423,32 @@  static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
 	return result;
 }
 
+static LLVMValueRef emit_umul_2x32_64(struct ac_llvm_context *ctx,
+				      LLVMValueRef src0, LLVMValueRef src1)
+{
+	LLVMValueRef result[2];
+
+	result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
+	result[1] = emit_umul_high(ctx, src0, src1);
+
+	LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);
+	tmp = ac_build_gather_values(ctx, result, 2);
+	return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
+}
+
+static LLVMValueRef emit_imul_2x32_64(struct ac_llvm_context *ctx,
+				      LLVMValueRef src0, LLVMValueRef src1)
+{
+	LLVMValueRef result[2];
+
+	result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
+	result[1] = emit_imul_high(ctx, src0, src1);
+
+	LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);
+	tmp = ac_build_gather_values(ctx, result, 2);
+	return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
+}
+
 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
 					  bool is_signed,
 					  const LLVMValueRef srcs[3])
@@ -977,6 +1003,16 @@  static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		src[1] = ac_to_integer(&ctx->ac, src[1]);
 		result = emit_imul_high(&ctx->ac, src[0], src[1]);
 		break;
+	case nir_op_umul_2x32_64:
+		src[0] = ac_to_integer(&ctx->ac, src[0]);
+		src[1] = ac_to_integer(&ctx->ac, src[1]);
+		result = emit_umul_2x32_64(&ctx->ac, src[0], src[1]);
+		break;
+	case nir_op_imul_2x32_64:
+		src[0] = ac_to_integer(&ctx->ac, src[0]);
+		src[1] = ac_to_integer(&ctx->ac, src[1]);
+		result = emit_imul_2x32_64(&ctx->ac, src[0], src[1]);
+		break;
 	case nir_op_pack_half_2x16:
 		result = emit_pack_half_2x16(&ctx->ac, src[0]);
 		break;

Comments

On Tue, Mar 5, 2019 at 10:30 AM Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
> Fixes: 58bcebd987b ("spirv: Allow [i/u]mulExtended to use new nir opcode")
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index af7a95137c2..74ae690e845 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -423,6 +423,32 @@ static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
>         return result;
>  }
>
> +static LLVMValueRef emit_umul_2x32_64(struct ac_llvm_context *ctx,
> +                                     LLVMValueRef src0, LLVMValueRef src1)
> +{
> +       LLVMValueRef result[2];
> +
> +       result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
> +       result[1] = emit_umul_high(ctx, src0, src1);
> +
> +       LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);

This tmp assignment is dead?
> +       tmp = ac_build_gather_values(ctx, result, 2);
> +       return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
> +}
> +
> +static LLVMValueRef emit_imul_2x32_64(struct ac_llvm_context *ctx,
> +                                     LLVMValueRef src0, LLVMValueRef src1)
> +{
> +       LLVMValueRef result[2];
> +
> +       result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
> +       result[1] = emit_imul_high(ctx, src0, src1);

If we do this lowering, why not just set options->lower_mul_2x32_64?

does it result in better code from LLVM if we convert both args to 64
bit and do a 64-bit mul?

> +
> +       LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);

This tmp assignment is dead?
> +       tmp = ac_build_gather_values(ctx, result, 2);
> +       return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
> +}
> +
>  static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
>                                           bool is_signed,
>                                           const LLVMValueRef srcs[3])
> @@ -977,6 +1003,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
>                 src[1] = ac_to_integer(&ctx->ac, src[1]);
>                 result = emit_imul_high(&ctx->ac, src[0], src[1]);
>                 break;
> +       case nir_op_umul_2x32_64:
> +               src[0] = ac_to_integer(&ctx->ac, src[0]);
> +               src[1] = ac_to_integer(&ctx->ac, src[1]);
> +               result = emit_umul_2x32_64(&ctx->ac, src[0], src[1]);
> +               break;
> +       case nir_op_imul_2x32_64:
> +               src[0] = ac_to_integer(&ctx->ac, src[0]);
> +               src[1] = ac_to_integer(&ctx->ac, src[1]);
> +               result = emit_imul_2x32_64(&ctx->ac, src[0], src[1]);
> +               break;
>         case nir_op_pack_half_2x16:
>                 result = emit_pack_half_2x16(&ctx->ac, src[0]);
>                 break;
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 3/5/19 2:01 PM, Bas Nieuwenhuizen wrote:
> On Tue, Mar 5, 2019 at 10:30 AM Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>> Fixes: 58bcebd987b ("spirv: Allow [i/u]mulExtended to use new nir opcode")
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>>   src/amd/common/ac_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++++
>>   1 file changed, 36 insertions(+)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>> index af7a95137c2..74ae690e845 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -423,6 +423,32 @@ static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
>>          return result;
>>   }
>>
>> +static LLVMValueRef emit_umul_2x32_64(struct ac_llvm_context *ctx,
>> +                                     LLVMValueRef src0, LLVMValueRef src1)
>> +{
>> +       LLVMValueRef result[2];
>> +
>> +       result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
>> +       result[1] = emit_umul_high(ctx, src0, src1);
>> +
>> +       LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);
> This tmp assignment is dead?
I will send a v2 with that removed.
>> +       tmp = ac_build_gather_values(ctx, result, 2);
>> +       return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
>> +}
>> +
>> +static LLVMValueRef emit_imul_2x32_64(struct ac_llvm_context *ctx,
>> +                                     LLVMValueRef src0, LLVMValueRef src1)
>> +{
>> +       LLVMValueRef result[2];
>> +
>> +       result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
>> +       result[1] = emit_imul_high(ctx, src0, src1);
> If we do this lowering, why not just set options->lower_mul_2x32_64?
>
> does it result in better code from LLVM if we convert both args to 64
> bit and do a 64-bit mul?
No LLVM differences.
>
>> +
>> +       LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);
> This tmp assignment is dead?
>> +       tmp = ac_build_gather_values(ctx, result, 2);
>> +       return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
>> +}
>> +
>>   static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
>>                                            bool is_signed,
>>                                            const LLVMValueRef srcs[3])
>> @@ -977,6 +1003,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
>>                  src[1] = ac_to_integer(&ctx->ac, src[1]);
>>                  result = emit_imul_high(&ctx->ac, src[0], src[1]);
>>                  break;
>> +       case nir_op_umul_2x32_64:
>> +               src[0] = ac_to_integer(&ctx->ac, src[0]);
>> +               src[1] = ac_to_integer(&ctx->ac, src[1]);
>> +               result = emit_umul_2x32_64(&ctx->ac, src[0], src[1]);
>> +               break;
>> +       case nir_op_imul_2x32_64:
>> +               src[0] = ac_to_integer(&ctx->ac, src[0]);
>> +               src[1] = ac_to_integer(&ctx->ac, src[1]);
>> +               result = emit_imul_2x32_64(&ctx->ac, src[0], src[1]);
>> +               break;
>>          case nir_op_pack_half_2x16:
>>                  result = emit_pack_half_2x16(&ctx->ac, src[0]);
>>                  break;
>> --
>> 2.21.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev