ac: use amdgpu-flat-work-group-size

Submitted by Marek Olšák on May 31, 2019, 8:08 p.m.

Details

Message ID 20190531200817.15959-1-maraeo@gmail.com
State New
Headers show
Series "ac: use amdgpu-flat-work-group-size" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák May 31, 2019, 8:08 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/amd/common/ac_llvm_util.c            | 10 ++++++++++
 src/amd/common/ac_llvm_util.h            |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c        |  7 ++-----
 src/gallium/drivers/radeonsi/si_shader.c |  7 ++-----
 4 files changed, 15 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 5b701603ebb..c8a8bf146fe 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -262,20 +262,30 @@  ac_dump_module(LLVMModuleRef module)
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
 				     const char *name, unsigned value)
 {
 	char str[16];
 
 	snprintf(str, sizeof(str), "0x%x", value);
 	LLVMAddTargetDependentFunctionAttr(F, name, str);
 }
 
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
+{
+	if (!size)
+		return;
+
+	char str[32];
+	snprintf(str, sizeof(str), "%u,%u", size, size);
+	LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
+}
+
 unsigned
 ac_count_scratch_private_memory(LLVMValueRef function)
 {
 	unsigned private_mem_vgprs = 0;
 
 	/* Process all LLVM instructions. */
 	LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
 	while (bb) {
 		LLVMValueRef next = LLVMGetFirstInstruction(bb);
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index ca00540da80..18102be5207 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -102,20 +102,21 @@  void ac_dump_module(LLVMModuleRef module);
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
 bool ac_llvm_is_function(LLVMValueRef v);
 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
 
 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 				 enum ac_float_mode float_mode);
 
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
 				     const char *name, unsigned value);
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
 
 static inline unsigned
 ac_get_load_intr_attribs(bool can_speculate)
 {
 	/* READNONE means writes can't affect it, while READONLY means that
 	 * writes can affect it. */
 	return can_speculate ? AC_FUNC_ATTR_READNONE :
 			       AC_FUNC_ATTR_READONLY;
 }
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 341f6388f32..6f102647ba8 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -511,25 +511,22 @@  create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
 			ac_add_attr_dereferenceable(P, UINT64_MAX);
 		}
 	}
 
 	if (options->address32_hi) {
 		ac_llvm_add_target_dep_function_attr(main_function,
 						     "amdgpu-32bit-address-high-bits",
 						     options->address32_hi);
 	}
 
-	if (max_workgroup_size) {
-		ac_llvm_add_target_dep_function_attr(main_function,
-						     "amdgpu-max-work-group-size",
-						     max_workgroup_size);
-	}
+	ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+
 	if (options->unsafe_math) {
 		/* These were copied from some LLVM test. */
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "less-precise-fpmad",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "no-infs-fp-math",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "no-nans-fp-math",
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d2927d0254b..1ba6b8b6033 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4276,25 +4276,22 @@  void si_create_function(struct si_shader_context *ctx,
 		if (fninfo->assign[i])
 			*fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
 	}
 
 	if (ctx->screen->info.address32_hi) {
 		ac_llvm_add_target_dep_function_attr(ctx->main_fn,
 						     "amdgpu-32bit-address-high-bits",
 						     ctx->screen->info.address32_hi);
 	}
 
-	if (max_workgroup_size) {
-		ac_llvm_add_target_dep_function_attr(ctx->main_fn,
-						     "amdgpu-max-work-group-size",
-						     max_workgroup_size);
-	}
+	ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
+
 	LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 					   "no-signed-zeros-fp-math",
 					   "true");
 
 	if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
 		/* These were copied from some LLVM test. */
 		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 						   "less-precise-fpmad",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,

Comments

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Thanks!

On Fri, May 31, 2019 at 10:08 PM Marek Olšák <maraeo@gmail.com> wrote:
>
> From: Marek Olšák <marek.olsak@amd.com>
>
> ---
>  src/amd/common/ac_llvm_util.c            | 10 ++++++++++
>  src/amd/common/ac_llvm_util.h            |  1 +
>  src/amd/vulkan/radv_nir_to_llvm.c        |  7 ++-----
>  src/gallium/drivers/radeonsi/si_shader.c |  7 ++-----
>  4 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index 5b701603ebb..c8a8bf146fe 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -262,20 +262,30 @@ ac_dump_module(LLVMModuleRef module)
>  void
>  ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
>                                      const char *name, unsigned value)
>  {
>         char str[16];
>
>         snprintf(str, sizeof(str), "0x%x", value);
>         LLVMAddTargetDependentFunctionAttr(F, name, str);
>  }
>
> +void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
> +{
> +       if (!size)
> +               return;
> +
> +       char str[32];
> +       snprintf(str, sizeof(str), "%u,%u", size, size);
> +       LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
> +}
> +
>  unsigned
>  ac_count_scratch_private_memory(LLVMValueRef function)
>  {
>         unsigned private_mem_vgprs = 0;
>
>         /* Process all LLVM instructions. */
>         LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
>         while (bb) {
>                 LLVMValueRef next = LLVMGetFirstInstruction(bb);
>
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index ca00540da80..18102be5207 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -102,20 +102,21 @@ void ac_dump_module(LLVMModuleRef module);
>  LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
>  bool ac_llvm_is_function(LLVMValueRef v);
>  LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
>
>  LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
>                                  enum ac_float_mode float_mode);
>
>  void
>  ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
>                                      const char *name, unsigned value);
> +void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
>
>  static inline unsigned
>  ac_get_load_intr_attribs(bool can_speculate)
>  {
>         /* READNONE means writes can't affect it, while READONLY means that
>          * writes can affect it. */
>         return can_speculate ? AC_FUNC_ATTR_READNONE :
>                                AC_FUNC_ATTR_READONLY;
>  }
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index 341f6388f32..6f102647ba8 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -511,25 +511,22 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
>                         ac_add_attr_dereferenceable(P, UINT64_MAX);
>                 }
>         }
>
>         if (options->address32_hi) {
>                 ac_llvm_add_target_dep_function_attr(main_function,
>                                                      "amdgpu-32bit-address-high-bits",
>                                                      options->address32_hi);
>         }
>
> -       if (max_workgroup_size) {
> -               ac_llvm_add_target_dep_function_attr(main_function,
> -                                                    "amdgpu-max-work-group-size",
> -                                                    max_workgroup_size);
> -       }
> +       ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
> +
>         if (options->unsafe_math) {
>                 /* These were copied from some LLVM test. */
>                 LLVMAddTargetDependentFunctionAttr(main_function,
>                                                    "less-precise-fpmad",
>                                                    "true");
>                 LLVMAddTargetDependentFunctionAttr(main_function,
>                                                    "no-infs-fp-math",
>                                                    "true");
>                 LLVMAddTargetDependentFunctionAttr(main_function,
>                                                    "no-nans-fp-math",
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index d2927d0254b..1ba6b8b6033 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4276,25 +4276,22 @@ void si_create_function(struct si_shader_context *ctx,
>                 if (fninfo->assign[i])
>                         *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
>         }
>
>         if (ctx->screen->info.address32_hi) {
>                 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
>                                                      "amdgpu-32bit-address-high-bits",
>                                                      ctx->screen->info.address32_hi);
>         }
>
> -       if (max_workgroup_size) {
> -               ac_llvm_add_target_dep_function_attr(ctx->main_fn,
> -                                                    "amdgpu-max-work-group-size",
> -                                                    max_workgroup_size);
> -       }
> +       ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
> +
>         LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
>                                            "no-signed-zeros-fp-math",
>                                            "true");
>
>         if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
>                 /* These were copied from some LLVM test. */
>                 LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
>                                                    "less-precise-fpmad",
>                                                    "true");
>                 LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev