[RFC,V2] radv: add initial support for VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT

Submitted by Timothy Arceri on May 8, 2018, 5:58 a.m.

Details

Message ID 20180508055821.4047-1-tarceri@itsqueeze.com
State Accepted
Series "radv: add initial support for VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT"
Commit ce188813bfe63068119cbf3d0f76e1ea3d27b722
Headers show

Commit Message

Timothy Arceri May 8, 2018, 5:58 a.m.
When VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is set we skip NIR
linking optimisations and only run over the NIR optimisation loop
once similar to the GLSLOptimizeConservatively constant used by
some GL drivers.

We need to run over the opts at least once to avoid errors in LLVM
(e.g. dead vars it can't handle) and also to reduce the time spent
compiling the IR in LLVM.

With this change the Blacksmith Unity demos compilation times
go from 329760 ms -> 299881 ms when using Wine and DXVK.

V2: add bit to radv_pipeline_key

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106246
---
 src/amd/vulkan/radv_pipeline.c | 26 +++++++++++++++++---------
 src/amd/vulkan/radv_private.h  |  1 +
 src/amd/vulkan/radv_shader.c   | 12 +++++++-----
 src/amd/vulkan/radv_shader.h   |  5 +++--
 4 files changed, 28 insertions(+), 16 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index b4e4f3211e2..d443f8271e9 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1724,13 +1724,13 @@  radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
 				ac_lower_indirect_derefs(ordered_shaders[i],
 				                         pipeline->device->physical_device->rad_info.chip_class);
 			}
-			radv_optimize_nir(ordered_shaders[i]);
+			radv_optimize_nir(ordered_shaders[i], false);
 
 			if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
 				ac_lower_indirect_derefs(ordered_shaders[i - 1],
 				                         pipeline->device->physical_device->rad_info.chip_class);
 			}
-			radv_optimize_nir(ordered_shaders[i - 1]);
+			radv_optimize_nir(ordered_shaders[i - 1], false);
 		}
 	}
 }
@@ -1750,6 +1750,9 @@  radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
 	struct radv_pipeline_key key;
 	memset(&key, 0, sizeof(key));
 
+	if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
+		key.optimisations_disabled = 1;
+
 	key.has_multiview_view_index = has_view_index;
 
 	uint32_t binding_input_rate = 0;
@@ -1878,7 +1881,8 @@  void radv_create_shaders(struct radv_pipeline *pipeline,
                          struct radv_device *device,
                          struct radv_pipeline_cache *cache,
                          struct radv_pipeline_key key,
-                         const VkPipelineShaderStageCreateInfo **pStages)
+                         const VkPipelineShaderStageCreateInfo **pStages,
+                         const VkPipelineCreateFlags flags)
 {
 	struct radv_shader_module fs_m = {0};
 	struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
@@ -1944,7 +1948,8 @@  void radv_create_shaders(struct radv_pipeline *pipeline,
 
 		nir[i] = radv_shader_compile_to_nir(device, modules[i],
 						    stage ? stage->pName : "main", i,
-						    stage ? stage->pSpecializationInfo : NULL);
+						    stage ? stage->pSpecializationInfo : NULL,
+						    flags);
 		pipeline->active_stages |= mesa_to_vk_shader_stage(i);
 
 		/* We don't want to alter meta shaders IR directly so clone it
@@ -1963,8 +1968,10 @@  void radv_create_shaders(struct radv_pipeline *pipeline,
 			if (i != last)
 				mask = mask | nir_var_shader_out;
 
-			nir_lower_io_to_scalar_early(nir[i], mask);
-			radv_optimize_nir(nir[i]);
+			if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
+				nir_lower_io_to_scalar_early(nir[i], mask);
+				radv_optimize_nir(nir[i], false);
+			}
 		}
 	}
 
@@ -1973,7 +1980,8 @@  void radv_create_shaders(struct radv_pipeline *pipeline,
 		merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
 	}
 
-	radv_link_shaders(pipeline, nir);
+	if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+		radv_link_shaders(pipeline, nir);
 
 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 		if (modules[i] && radv_can_dump_shader(device, modules[i]))
@@ -3349,7 +3357,7 @@  radv_pipeline_init(struct radv_pipeline *pipeline,
 
 	radv_create_shaders(pipeline, device, cache, 
 	                    radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index),
-	                    pStages);
+	                    pStages, pCreateInfo->flags);
 
 	pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
@@ -3582,7 +3590,7 @@  static VkResult radv_compute_pipeline_create(
 	assert(pipeline->layout);
 
 	pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
-	radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages);
+	radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
 
 	pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
 	pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5d67271961b..4805acab280 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -360,6 +360,7 @@  struct radv_pipeline_key {
 	uint8_t log2_num_samples;
 	uint32_t multisample : 1;
 	uint32_t has_multiview_view_index : 1;
+	uint32_t optimisations_disabled : 1;
 };
 
 void
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 014ed78c228..d6d70b971cd 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -117,7 +117,7 @@  void radv_DestroyShaderModule(
 }
 
 void
-radv_optimize_nir(struct nir_shader *shader)
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
 {
         bool progress;
 
@@ -149,7 +149,7 @@  radv_optimize_nir(struct nir_shader *shader)
                 if (shader->options->max_unroll_iterations) {
                         NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
                 }
-        } while (progress);
+        } while (progress && !optimize_conservatively);
 
         NIR_PASS(progress, shader, nir_opt_shrink_load);
         NIR_PASS(progress, shader, nir_opt_move_load_ubo);
@@ -160,7 +160,8 @@  radv_shader_compile_to_nir(struct radv_device *device,
 			   struct radv_shader_module *module,
 			   const char *entrypoint_name,
 			   gl_shader_stage stage,
-			   const VkSpecializationInfo *spec_info)
+			   const VkSpecializationInfo *spec_info,
+			   const VkPipelineCreateFlags flags)
 {
 	if (strcmp(entrypoint_name, "main") != 0) {
 		radv_finishme("Multiple shaders per module not really supported");
@@ -293,7 +294,8 @@  radv_shader_compile_to_nir(struct radv_device *device,
 			.lower_vote_eq_to_ballot = 1,
 		});
 
-	radv_optimize_nir(nir);
+	if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+		radv_optimize_nir(nir, false);
 
 	/* Indirect lowering must be called after the radv_optimize_nir() loop
 	 * has been called at least once. Otherwise indirect lowering can
@@ -301,7 +303,7 @@  radv_shader_compile_to_nir(struct radv_device *device,
 	 * considered too large for unrolling.
 	 */
 	ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
-	radv_optimize_nir(nir);
+	radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
 
 	return nir;
 }
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 6588b787724..182b69849c0 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -282,14 +282,15 @@  struct radv_shader_slab {
 };
 
 void
-radv_optimize_nir(struct nir_shader *shader);
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
 
 nir_shader *
 radv_shader_compile_to_nir(struct radv_device *device,
 			   struct radv_shader_module *module,
 			   const char *entrypoint_name,
 			   gl_shader_stage stage,
-			   const VkSpecializationInfo *spec_info);
+			   const VkSpecializationInfo *spec_info,
+			   const VkPipelineCreateFlags flags);
 
 void *
 radv_alloc_shader_memory(struct radv_device *device,

Comments

Bas Nieuwenhuizen May 12, 2018, 10:17 p.m.
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

if you also have a spec to go with the new bit. (and add it to the headers)

On Tue, May 8, 2018 at 7:58 AM, Timothy Arceri <tarceri@itsqueeze.com> wrote:
> When VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is set we skip NIR
> linking optimisations and only run over the NIR optimisation loop
> once similar to the GLSLOptimizeConservatively constant used by
> some GL drivers.
>
> We need to run over the opts at least once to avoid errors in LLVM
> (e.g. dead vars it can't handle) and also to reduce the time spent
> compiling the IR in LLVM.
>
> With this change the Blacksmith Unity demos compilation times
> go from 329760 ms -> 299881 ms when using Wine and DXVK.
>
> V2: add bit to radv_pipeline_key
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106246
> ---
>  src/amd/vulkan/radv_pipeline.c | 26 +++++++++++++++++---------
>  src/amd/vulkan/radv_private.h  |  1 +
>  src/amd/vulkan/radv_shader.c   | 12 +++++++-----
>  src/amd/vulkan/radv_shader.h   |  5 +++--
>  4 files changed, 28 insertions(+), 16 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index b4e4f3211e2..d443f8271e9 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1724,13 +1724,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
>                                 ac_lower_indirect_derefs(ordered_shaders[i],
>                                                          pipeline->device->physical_device->rad_info.chip_class);
>                         }
> -                       radv_optimize_nir(ordered_shaders[i]);
> +                       radv_optimize_nir(ordered_shaders[i], false);
>
>                         if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
>                                 ac_lower_indirect_derefs(ordered_shaders[i - 1],
>                                                          pipeline->device->physical_device->rad_info.chip_class);
>                         }
> -                       radv_optimize_nir(ordered_shaders[i - 1]);
> +                       radv_optimize_nir(ordered_shaders[i - 1], false);
>                 }
>         }
>  }
> @@ -1750,6 +1750,9 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
>         struct radv_pipeline_key key;
>         memset(&key, 0, sizeof(key));
>
> +       if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
> +               key.optimisations_disabled = 1;
> +
>         key.has_multiview_view_index = has_view_index;
>
>         uint32_t binding_input_rate = 0;
> @@ -1878,7 +1881,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>                           struct radv_device *device,
>                           struct radv_pipeline_cache *cache,
>                           struct radv_pipeline_key key,
> -                         const VkPipelineShaderStageCreateInfo **pStages)
> +                         const VkPipelineShaderStageCreateInfo **pStages,
> +                         const VkPipelineCreateFlags flags)
>  {
>         struct radv_shader_module fs_m = {0};
>         struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
> @@ -1944,7 +1948,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>
>                 nir[i] = radv_shader_compile_to_nir(device, modules[i],
>                                                     stage ? stage->pName : "main", i,
> -                                                   stage ? stage->pSpecializationInfo : NULL);
> +                                                   stage ? stage->pSpecializationInfo : NULL,
> +                                                   flags);
>                 pipeline->active_stages |= mesa_to_vk_shader_stage(i);
>
>                 /* We don't want to alter meta shaders IR directly so clone it
> @@ -1963,8 +1968,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>                         if (i != last)
>                                 mask = mask | nir_var_shader_out;
>
> -                       nir_lower_io_to_scalar_early(nir[i], mask);
> -                       radv_optimize_nir(nir[i]);
> +                       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
> +                               nir_lower_io_to_scalar_early(nir[i], mask);
> +                               radv_optimize_nir(nir[i], false);
> +                       }
>                 }
>         }
>
> @@ -1973,7 +1980,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>                 merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
>         }
>
> -       radv_link_shaders(pipeline, nir);
> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
> +               radv_link_shaders(pipeline, nir);
>
>         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
>                 if (modules[i] && radv_can_dump_shader(device, modules[i]))
> @@ -3349,7 +3357,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>
>         radv_create_shaders(pipeline, device, cache,
>                             radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index),
> -                           pStages);
> +                           pStages, pCreateInfo->flags);
>
>         pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
>         radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
> @@ -3582,7 +3590,7 @@ static VkResult radv_compute_pipeline_create(
>         assert(pipeline->layout);
>
>         pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
> -       radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages);
> +       radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
>
>         pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
>         pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 5d67271961b..4805acab280 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -360,6 +360,7 @@ struct radv_pipeline_key {
>         uint8_t log2_num_samples;
>         uint32_t multisample : 1;
>         uint32_t has_multiview_view_index : 1;
> +       uint32_t optimisations_disabled : 1;
>  };
>
>  void
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 014ed78c228..d6d70b971cd 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -117,7 +117,7 @@ void radv_DestroyShaderModule(
>  }
>
>  void
> -radv_optimize_nir(struct nir_shader *shader)
> +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
>  {
>          bool progress;
>
> @@ -149,7 +149,7 @@ radv_optimize_nir(struct nir_shader *shader)
>                  if (shader->options->max_unroll_iterations) {
>                          NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
>                  }
> -        } while (progress);
> +        } while (progress && !optimize_conservatively);
>
>          NIR_PASS(progress, shader, nir_opt_shrink_load);
>          NIR_PASS(progress, shader, nir_opt_move_load_ubo);
> @@ -160,7 +160,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>                            struct radv_shader_module *module,
>                            const char *entrypoint_name,
>                            gl_shader_stage stage,
> -                          const VkSpecializationInfo *spec_info)
> +                          const VkSpecializationInfo *spec_info,
> +                          const VkPipelineCreateFlags flags)
>  {
>         if (strcmp(entrypoint_name, "main") != 0) {
>                 radv_finishme("Multiple shaders per module not really supported");
> @@ -293,7 +294,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>                         .lower_vote_eq_to_ballot = 1,
>                 });
>
> -       radv_optimize_nir(nir);
> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
> +               radv_optimize_nir(nir, false);
>
>         /* Indirect lowering must be called after the radv_optimize_nir() loop
>          * has been called at least once. Otherwise indirect lowering can
> @@ -301,7 +303,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
>          * considered too large for unrolling.
>          */
>         ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
> -       radv_optimize_nir(nir);
> +       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
>
>         return nir;
>  }
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index 6588b787724..182b69849c0 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -282,14 +282,15 @@ struct radv_shader_slab {
>  };
>
>  void
> -radv_optimize_nir(struct nir_shader *shader);
> +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
>
>  nir_shader *
>  radv_shader_compile_to_nir(struct radv_device *device,
>                            struct radv_shader_module *module,
>                            const char *entrypoint_name,
>                            gl_shader_stage stage,
> -                          const VkSpecializationInfo *spec_info);
> +                          const VkSpecializationInfo *spec_info,
> +                          const VkPipelineCreateFlags flags);
>
>  void *
>  radv_alloc_shader_memory(struct radv_device *device,
> --
> 2.17.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Timothy Arceri May 12, 2018, 10:46 p.m.
On 13/05/18 08:17, Bas Nieuwenhuizen wrote:
> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> 
> if you also have a spec to go with the new bit. (and add it to the headers)

VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is already part of the spec. 
I think you are getting confused with the previous discussion for a 
threaded shader compiles bit.

> 
> On Tue, May 8, 2018 at 7:58 AM, Timothy Arceri <tarceri@itsqueeze.com> wrote:
>> When VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is set we skip NIR
>> linking optimisations and only run over the NIR optimisation loop
>> once similar to the GLSLOptimizeConservatively constant used by
>> some GL drivers.
>>
>> We need to run over the opts at least once to avoid errors in LLVM
>> (e.g. dead vars it can't handle) and also to reduce the time spent
>> compiling the IR in LLVM.
>>
>> With this change the Blacksmith Unity demos compilation times
>> go from 329760 ms -> 299881 ms when using Wine and DXVK.
>>
>> V2: add bit to radv_pipeline_key
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106246
>> ---
>>   src/amd/vulkan/radv_pipeline.c | 26 +++++++++++++++++---------
>>   src/amd/vulkan/radv_private.h  |  1 +
>>   src/amd/vulkan/radv_shader.c   | 12 +++++++-----
>>   src/amd/vulkan/radv_shader.h   |  5 +++--
>>   4 files changed, 28 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
>> index b4e4f3211e2..d443f8271e9 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -1724,13 +1724,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
>>                                  ac_lower_indirect_derefs(ordered_shaders[i],
>>                                                           pipeline->device->physical_device->rad_info.chip_class);
>>                          }
>> -                       radv_optimize_nir(ordered_shaders[i]);
>> +                       radv_optimize_nir(ordered_shaders[i], false);
>>
>>                          if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
>>                                  ac_lower_indirect_derefs(ordered_shaders[i - 1],
>>                                                           pipeline->device->physical_device->rad_info.chip_class);
>>                          }
>> -                       radv_optimize_nir(ordered_shaders[i - 1]);
>> +                       radv_optimize_nir(ordered_shaders[i - 1], false);
>>                  }
>>          }
>>   }
>> @@ -1750,6 +1750,9 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
>>          struct radv_pipeline_key key;
>>          memset(&key, 0, sizeof(key));
>>
>> +       if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
>> +               key.optimisations_disabled = 1;
>> +
>>          key.has_multiview_view_index = has_view_index;
>>
>>          uint32_t binding_input_rate = 0;
>> @@ -1878,7 +1881,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>>                            struct radv_device *device,
>>                            struct radv_pipeline_cache *cache,
>>                            struct radv_pipeline_key key,
>> -                         const VkPipelineShaderStageCreateInfo **pStages)
>> +                         const VkPipelineShaderStageCreateInfo **pStages,
>> +                         const VkPipelineCreateFlags flags)
>>   {
>>          struct radv_shader_module fs_m = {0};
>>          struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
>> @@ -1944,7 +1948,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>>
>>                  nir[i] = radv_shader_compile_to_nir(device, modules[i],
>>                                                      stage ? stage->pName : "main", i,
>> -                                                   stage ? stage->pSpecializationInfo : NULL);
>> +                                                   stage ? stage->pSpecializationInfo : NULL,
>> +                                                   flags);
>>                  pipeline->active_stages |= mesa_to_vk_shader_stage(i);
>>
>>                  /* We don't want to alter meta shaders IR directly so clone it
>> @@ -1963,8 +1968,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>>                          if (i != last)
>>                                  mask = mask | nir_var_shader_out;
>>
>> -                       nir_lower_io_to_scalar_early(nir[i], mask);
>> -                       radv_optimize_nir(nir[i]);
>> +                       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
>> +                               nir_lower_io_to_scalar_early(nir[i], mask);
>> +                               radv_optimize_nir(nir[i], false);
>> +                       }
>>                  }
>>          }
>>
>> @@ -1973,7 +1980,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>>                  merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
>>          }
>>
>> -       radv_link_shaders(pipeline, nir);
>> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
>> +               radv_link_shaders(pipeline, nir);
>>
>>          for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
>>                  if (modules[i] && radv_can_dump_shader(device, modules[i]))
>> @@ -3349,7 +3357,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>>
>>          radv_create_shaders(pipeline, device, cache,
>>                              radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index),
>> -                           pStages);
>> +                           pStages, pCreateInfo->flags);
>>
>>          pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
>>          radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
>> @@ -3582,7 +3590,7 @@ static VkResult radv_compute_pipeline_create(
>>          assert(pipeline->layout);
>>
>>          pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
>> -       radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages);
>> +       radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
>>
>>          pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
>>          pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>> index 5d67271961b..4805acab280 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -360,6 +360,7 @@ struct radv_pipeline_key {
>>          uint8_t log2_num_samples;
>>          uint32_t multisample : 1;
>>          uint32_t has_multiview_view_index : 1;
>> +       uint32_t optimisations_disabled : 1;
>>   };
>>
>>   void
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 014ed78c228..d6d70b971cd 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -117,7 +117,7 @@ void radv_DestroyShaderModule(
>>   }
>>
>>   void
>> -radv_optimize_nir(struct nir_shader *shader)
>> +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
>>   {
>>           bool progress;
>>
>> @@ -149,7 +149,7 @@ radv_optimize_nir(struct nir_shader *shader)
>>                   if (shader->options->max_unroll_iterations) {
>>                           NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
>>                   }
>> -        } while (progress);
>> +        } while (progress && !optimize_conservatively);
>>
>>           NIR_PASS(progress, shader, nir_opt_shrink_load);
>>           NIR_PASS(progress, shader, nir_opt_move_load_ubo);
>> @@ -160,7 +160,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>                             struct radv_shader_module *module,
>>                             const char *entrypoint_name,
>>                             gl_shader_stage stage,
>> -                          const VkSpecializationInfo *spec_info)
>> +                          const VkSpecializationInfo *spec_info,
>> +                          const VkPipelineCreateFlags flags)
>>   {
>>          if (strcmp(entrypoint_name, "main") != 0) {
>>                  radv_finishme("Multiple shaders per module not really supported");
>> @@ -293,7 +294,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>                          .lower_vote_eq_to_ballot = 1,
>>                  });
>>
>> -       radv_optimize_nir(nir);
>> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
>> +               radv_optimize_nir(nir, false);
>>
>>          /* Indirect lowering must be called after the radv_optimize_nir() loop
>>           * has been called at least once. Otherwise indirect lowering can
>> @@ -301,7 +303,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>           * considered too large for unrolling.
>>           */
>>          ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
>> -       radv_optimize_nir(nir);
>> +       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
>>
>>          return nir;
>>   }
>> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
>> index 6588b787724..182b69849c0 100644
>> --- a/src/amd/vulkan/radv_shader.h
>> +++ b/src/amd/vulkan/radv_shader.h
>> @@ -282,14 +282,15 @@ struct radv_shader_slab {
>>   };
>>
>>   void
>> -radv_optimize_nir(struct nir_shader *shader);
>> +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
>>
>>   nir_shader *
>>   radv_shader_compile_to_nir(struct radv_device *device,
>>                             struct radv_shader_module *module,
>>                             const char *entrypoint_name,
>>                             gl_shader_stage stage,
>> -                          const VkSpecializationInfo *spec_info);
>> +                          const VkSpecializationInfo *spec_info,
>> +                          const VkPipelineCreateFlags flags);
>>
>>   void *
>>   radv_alloc_shader_memory(struct radv_device *device,
>> --
>> 2.17.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Bas Nieuwenhuizen May 12, 2018, 10:49 p.m.
On Sun, May 13, 2018 at 12:46 AM, Timothy Arceri <tarceri@itsqueeze.com> wrote:
> On 13/05/18 08:17, Bas Nieuwenhuizen wrote:
>>
>> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>>
>> if you also have a spec to go with the new bit. (and add it to the
>> headers)
>
>
> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is already part of the spec. I
> think you are getting confused with the previous discussion for a threaded
> shader compiles bit.

Looks like I was indeed.

>
>
>>
>> On Tue, May 8, 2018 at 7:58 AM, Timothy Arceri <tarceri@itsqueeze.com>
>> wrote:
>>>
>>> When VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT is set we skip NIR
>>> linking optimisations and only run over the NIR optimisation loop
>>> once similar to the GLSLOptimizeConservatively constant used by
>>> some GL drivers.
>>>
>>> We need to run over the opts at least once to avoid errors in LLVM
>>> (e.g. dead vars it can't handle) and also to reduce the time spent
>>> compiling the IR in LLVM.
>>>
>>> With this change the Blacksmith Unity demos compilation times
>>> go from 329760 ms -> 299881 ms when using Wine and DXVK.
>>>
>>> V2: add bit to radv_pipeline_key
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106246
>>> ---
>>>   src/amd/vulkan/radv_pipeline.c | 26 +++++++++++++++++---------
>>>   src/amd/vulkan/radv_private.h  |  1 +
>>>   src/amd/vulkan/radv_shader.c   | 12 +++++++-----
>>>   src/amd/vulkan/radv_shader.h   |  5 +++--
>>>   4 files changed, 28 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_pipeline.c
>>> b/src/amd/vulkan/radv_pipeline.c
>>> index b4e4f3211e2..d443f8271e9 100644
>>> --- a/src/amd/vulkan/radv_pipeline.c
>>> +++ b/src/amd/vulkan/radv_pipeline.c
>>> @@ -1724,13 +1724,13 @@ radv_link_shaders(struct radv_pipeline *pipeline,
>>> nir_shader **shaders)
>>>
>>> ac_lower_indirect_derefs(ordered_shaders[i],
>>>
>>> pipeline->device->physical_device->rad_info.chip_class);
>>>                          }
>>> -                       radv_optimize_nir(ordered_shaders[i]);
>>> +                       radv_optimize_nir(ordered_shaders[i], false);
>>>
>>>                          if
>>> (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
>>>
>>> ac_lower_indirect_derefs(ordered_shaders[i - 1],
>>>
>>> pipeline->device->physical_device->rad_info.chip_class);
>>>                          }
>>> -                       radv_optimize_nir(ordered_shaders[i - 1]);
>>> +                       radv_optimize_nir(ordered_shaders[i - 1], false);
>>>                  }
>>>          }
>>>   }
>>> @@ -1750,6 +1750,9 @@ radv_generate_graphics_pipeline_key(struct
>>> radv_pipeline *pipeline,
>>>          struct radv_pipeline_key key;
>>>          memset(&key, 0, sizeof(key));
>>>
>>> +       if (pCreateInfo->flags &
>>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
>>> +               key.optimisations_disabled = 1;
>>> +
>>>          key.has_multiview_view_index = has_view_index;
>>>
>>>          uint32_t binding_input_rate = 0;
>>> @@ -1878,7 +1881,8 @@ void radv_create_shaders(struct radv_pipeline
>>> *pipeline,
>>>                            struct radv_device *device,
>>>                            struct radv_pipeline_cache *cache,
>>>                            struct radv_pipeline_key key,
>>> -                         const VkPipelineShaderStageCreateInfo
>>> **pStages)
>>> +                         const VkPipelineShaderStageCreateInfo
>>> **pStages,
>>> +                         const VkPipelineCreateFlags flags)
>>>   {
>>>          struct radv_shader_module fs_m = {0};
>>>          struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
>>> @@ -1944,7 +1948,8 @@ void radv_create_shaders(struct radv_pipeline
>>> *pipeline,
>>>
>>>                  nir[i] = radv_shader_compile_to_nir(device, modules[i],
>>>                                                      stage ? stage->pName
>>> : "main", i,
>>> -                                                   stage ?
>>> stage->pSpecializationInfo : NULL);
>>> +                                                   stage ?
>>> stage->pSpecializationInfo : NULL,
>>> +                                                   flags);
>>>                  pipeline->active_stages |= mesa_to_vk_shader_stage(i);
>>>
>>>                  /* We don't want to alter meta shaders IR directly so
>>> clone it
>>> @@ -1963,8 +1968,10 @@ void radv_create_shaders(struct radv_pipeline
>>> *pipeline,
>>>                          if (i != last)
>>>                                  mask = mask | nir_var_shader_out;
>>>
>>> -                       nir_lower_io_to_scalar_early(nir[i], mask);
>>> -                       radv_optimize_nir(nir[i]);
>>> +                       if (!(flags &
>>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
>>> +                               nir_lower_io_to_scalar_early(nir[i],
>>> mask);
>>> +                               radv_optimize_nir(nir[i], false);
>>> +                       }
>>>                  }
>>>          }
>>>
>>> @@ -1973,7 +1980,8 @@ void radv_create_shaders(struct radv_pipeline
>>> *pipeline,
>>>                  merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info,
>>> &nir[MESA_SHADER_TESS_CTRL]->info);
>>>          }
>>>
>>> -       radv_link_shaders(pipeline, nir);
>>> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
>>> +               radv_link_shaders(pipeline, nir);
>>>
>>>          for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
>>>                  if (modules[i] && radv_can_dump_shader(device,
>>> modules[i]))
>>> @@ -3349,7 +3357,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>>>
>>>          radv_create_shaders(pipeline, device, cache,
>>>
>>> radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend,
>>> has_view_index),
>>> -                           pStages);
>>> +                           pStages, pCreateInfo->flags);
>>>
>>>          pipeline->graphics.spi_baryc_cntl =
>>> S_0286E0_FRONT_FACE_ALL_BITS(1);
>>>          radv_pipeline_init_multisample_state(pipeline, &blend,
>>> pCreateInfo);
>>> @@ -3582,7 +3590,7 @@ static VkResult radv_compute_pipeline_create(
>>>          assert(pipeline->layout);
>>>
>>>          pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
>>> -       radv_create_shaders(pipeline, device, cache, (struct
>>> radv_pipeline_key) {0}, pStages);
>>> +       radv_create_shaders(pipeline, device, cache, (struct
>>> radv_pipeline_key) {0}, pStages, pCreateInfo->flags);
>>>
>>>          pipeline->user_data_0[MESA_SHADER_COMPUTE] =
>>> radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE,
>>> device->physical_device->rad_info.chip_class);
>>>          pipeline->need_indirect_descriptor_sets |=
>>> pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
>>> diff --git a/src/amd/vulkan/radv_private.h
>>> b/src/amd/vulkan/radv_private.h
>>> index 5d67271961b..4805acab280 100644
>>> --- a/src/amd/vulkan/radv_private.h
>>> +++ b/src/amd/vulkan/radv_private.h
>>> @@ -360,6 +360,7 @@ struct radv_pipeline_key {
>>>          uint8_t log2_num_samples;
>>>          uint32_t multisample : 1;
>>>          uint32_t has_multiview_view_index : 1;
>>> +       uint32_t optimisations_disabled : 1;
>>>   };
>>>
>>>   void
>>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>>> index 014ed78c228..d6d70b971cd 100644
>>> --- a/src/amd/vulkan/radv_shader.c
>>> +++ b/src/amd/vulkan/radv_shader.c
>>> @@ -117,7 +117,7 @@ void radv_DestroyShaderModule(
>>>   }
>>>
>>>   void
>>> -radv_optimize_nir(struct nir_shader *shader)
>>> +radv_optimize_nir(struct nir_shader *shader, bool
>>> optimize_conservatively)
>>>   {
>>>           bool progress;
>>>
>>> @@ -149,7 +149,7 @@ radv_optimize_nir(struct nir_shader *shader)
>>>                   if (shader->options->max_unroll_iterations) {
>>>                           NIR_PASS(progress, shader, nir_opt_loop_unroll,
>>> 0);
>>>                   }
>>> -        } while (progress);
>>> +        } while (progress && !optimize_conservatively);
>>>
>>>           NIR_PASS(progress, shader, nir_opt_shrink_load);
>>>           NIR_PASS(progress, shader, nir_opt_move_load_ubo);
>>> @@ -160,7 +160,8 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>>                             struct radv_shader_module *module,
>>>                             const char *entrypoint_name,
>>>                             gl_shader_stage stage,
>>> -                          const VkSpecializationInfo *spec_info)
>>> +                          const VkSpecializationInfo *spec_info,
>>> +                          const VkPipelineCreateFlags flags)
>>>   {
>>>          if (strcmp(entrypoint_name, "main") != 0) {
>>>                  radv_finishme("Multiple shaders per module not really
>>> supported");
>>> @@ -293,7 +294,8 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>>                          .lower_vote_eq_to_ballot = 1,
>>>                  });
>>>
>>> -       radv_optimize_nir(nir);
>>> +       if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
>>> +               radv_optimize_nir(nir, false);
>>>
>>>          /* Indirect lowering must be called after the
>>> radv_optimize_nir() loop
>>>           * has been called at least once. Otherwise indirect lowering
>>> can
>>> @@ -301,7 +303,7 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>>           * considered too large for unrolling.
>>>           */
>>>          ac_lower_indirect_derefs(nir,
>>> device->physical_device->rad_info.chip_class);
>>> -       radv_optimize_nir(nir);
>>> +       radv_optimize_nir(nir, flags &
>>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
>>>
>>>          return nir;
>>>   }
>>> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
>>> index 6588b787724..182b69849c0 100644
>>> --- a/src/amd/vulkan/radv_shader.h
>>> +++ b/src/amd/vulkan/radv_shader.h
>>> @@ -282,14 +282,15 @@ struct radv_shader_slab {
>>>   };
>>>
>>>   void
>>> -radv_optimize_nir(struct nir_shader *shader);
>>> +radv_optimize_nir(struct nir_shader *shader, bool
>>> optimize_conservatively);
>>>
>>>   nir_shader *
>>>   radv_shader_compile_to_nir(struct radv_device *device,
>>>                             struct radv_shader_module *module,
>>>                             const char *entrypoint_name,
>>>                             gl_shader_stage stage,
>>> -                          const VkSpecializationInfo *spec_info);
>>> +                          const VkSpecializationInfo *spec_info,
>>> +                          const VkPipelineCreateFlags flags);
>>>
>>>   void *
>>>   radv_alloc_shader_memory(struct radv_device *device,
>>> --
>>> 2.17.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev