[2/2] radv: Implement VK_AMD_shader_info

Submitted by Alex Smith on Oct. 25, 2017, 11:18 a.m.

Details

Message ID 20171025111855.19705-2-asmith@feralinteractive.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Alex Smith Oct. 25, 2017, 11:18 a.m.
This allows an app to query shader statistics and get a disassembly of
a shader. RenderDoc git has support for it, so this allows you to view
shader disassembly from a capture.

When this extension is enabled on a device (or when tracing), we now
disable pipeline caching, since we don't get the shader debug info when
we retrieve cached shaders.

Signed-off-by: Alex Smith <asmith@feralinteractive.com>
---
 src/amd/vulkan/radv_device.c         |   9 ++
 src/amd/vulkan/radv_extensions.py    |   1 +
 src/amd/vulkan/radv_pipeline.c       |   2 +-
 src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
 src/amd/vulkan/radv_private.h        |   3 +
 src/amd/vulkan/radv_shader.c         | 163 ++++++++++++++++++++++++++++-------
 6 files changed, 154 insertions(+), 35 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c4e25222ea..5603551680 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -943,10 +943,15 @@  VkResult radv_CreateDevice(
 	VkResult result;
 	struct radv_device *device;
 
+	bool keep_shader_info = false;
+
 	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
 		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
 		if (!radv_physical_device_extension_supported(physical_device, ext_name))
 			return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
+
+		if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
+			keep_shader_info = true;
 	}
 
 	/* Check enabled features */
@@ -1040,10 +1045,14 @@  VkResult radv_CreateDevice(
 		device->physical_device->rad_info.max_se >= 2;
 
 	if (getenv("RADV_TRACE_FILE")) {
+		keep_shader_info = true;
+
 		if (!radv_init_trace(device))
 			goto fail;
 	}
 
+	device->keep_shader_info = keep_shader_info;
+
 	result = radv_device_init_meta(device);
 	if (result != VK_SUCCESS)
 		goto fail;
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index dfeb2880fc..eeb679d65a 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -81,6 +81,7 @@  EXTENSIONS = [
     Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
+    Extension('VK_AMD_shader_info',                       1, True),
 ]
 
 class VkVersion:
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index d6b33a5327..2df03a83cf 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1874,7 +1874,7 @@  void radv_create_shaders(struct radv_pipeline *pipeline,
 			if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
 				nir_print_shader(nir[i], stderr);
 
-			if (!pipeline->device->trace_bo)
+			if (!pipeline->device->keep_shader_info)
 				ralloc_free(nir[i]);
 		}
 	}
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
index 9ba9a3b61b..46198799a7 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -62,9 +62,11 @@  radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
 	cache->hash_table = malloc(byte_size);
 
 	/* We don't consider allocation failure fatal, we just start with a 0-sized
-	 * cache. */
+	 * cache. Disable caching when we want to keep shader debug info, since
+	 * we don't get the debug info on cached shaders. */
 	if (cache->hash_table == NULL ||
-	    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
+	    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
+	    device->keep_shader_info)
 		cache->table_size = 0;
 	else
 		memset(cache->hash_table, 0, byte_size);
@@ -186,8 +188,11 @@  radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
 
 	if (!entry) {
+		/* Again, don't cache when we want debug info, since this isn't
+		 * present in the cache. */
 		if (!device->physical_device->disk_cache ||
-		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
+		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
+		    device->keep_shader_info) {
 			pthread_mutex_unlock(&cache->mutex);
 			return false;
 		}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a4e52b2530..169df5f37b 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -552,6 +552,9 @@  struct radv_device {
 	struct radeon_winsys_bo                      *trace_bo;
 	uint32_t                                     *trace_id_ptr;
 
+	/* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
+	bool                                         keep_shader_info;
+
 	struct radv_physical_device                  *physical_device;
 
 	/* Backup in-memory cache to be used if the app doesn't provide one */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 5903917068..7f2f0fd750 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -46,6 +46,8 @@ 
 #include "util/debug.h"
 #include "ac_exp_param.h"
 
+#include "util/string_buffer.h"
+
 static const struct nir_shader_compiler_options nir_options = {
 	.vertex_id_zero_based = true,
 	.lower_scmp = true,
@@ -471,7 +473,7 @@  shader_variant_create(struct radv_device *device,
 	free(binary.relocs);
 	variant->ref_count = 1;
 
-	if (device->trace_bo) {
+	if (device->keep_shader_info) {
 		variant->disasm_string = binary.disasm_string;
 		if (!gs_copy_shader && !module->nir) {
 			variant->nir = *shaders;
@@ -593,11 +595,20 @@  radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
 	};
 }
 
-void
-radv_shader_dump_stats(struct radv_device *device,
-		       struct radv_shader_variant *variant,
-		       gl_shader_stage stage,
-		       FILE *file)
+static uint32_t
+get_total_sgprs(struct radv_device *device)
+{
+	if (device->physical_device->rad_info.chip_class >= VI)
+		return 800;
+	else
+		return 512;
+}
+
+static void
+generate_shader_stats(struct radv_device *device,
+		      struct radv_shader_variant *variant,
+		      gl_shader_stage stage,
+		      struct _mesa_string_buffer *buf)
 {
 	unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
 	struct ac_shader_config *conf;
@@ -623,12 +634,8 @@  radv_shader_dump_stats(struct radv_device *device,
 				     lds_increment);
 	}
 
-	if (conf->num_sgprs) {
-		if (device->physical_device->rad_info.chip_class >= VI)
-			max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
-		else
-			max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
-	}
+	if (conf->num_sgprs)
+		max_simd_waves = MIN2(max_simd_waves, get_total_sgprs(device) / conf->num_sgprs);
 
 	if (conf->num_vgprs)
 		max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
@@ -639,27 +646,121 @@  radv_shader_dump_stats(struct radv_device *device,
 	if (lds_per_wave)
 		max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
+	if (stage == MESA_SHADER_FRAGMENT) {
+		_mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
+					   "SPI_PS_INPUT_ADDR = 0x%04x\n"
+					   "SPI_PS_INPUT_ENA  = 0x%04x\n",
+					   conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+	}
+
+	_mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
+				   "SGPRS: %d\n"
+				   "VGPRS: %d\n"
+				   "Spilled SGPRs: %d\n"
+				   "Spilled VGPRs: %d\n"
+				   "Code Size: %d bytes\n"
+				   "LDS: %d blocks\n"
+				   "Scratch: %d bytes per wave\n"
+				   "Max Waves: %d\n"
+				   "********************\n\n\n",
+				   conf->num_sgprs, conf->num_vgprs,
+				   conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
+				   conf->lds_size, conf->scratch_bytes_per_wave,
+				   max_simd_waves);
+}
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+		       struct radv_shader_variant *variant,
+		       gl_shader_stage stage,
+		       FILE *file)
+{
+	struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256);
+
+	generate_shader_stats(device, variant, stage, buf);
+
 	fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
+	fprintf(file, buf->buf);
 
-	if (stage == MESA_SHADER_FRAGMENT) {
-		fprintf(file, "*** SHADER CONFIG ***\n"
-			"SPI_PS_INPUT_ADDR = 0x%04x\n"
-			"SPI_PS_INPUT_ENA  = 0x%04x\n",
-			conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+	_mesa_string_buffer_destroy(buf);
+}
+
+VkResult
+radv_GetShaderInfoAMD(VkDevice _device,
+		      VkPipeline _pipeline,
+		      VkShaderStageFlagBits shaderStage,
+		      VkShaderInfoTypeAMD infoType,
+		      size_t* pInfoSize,
+		      void* pInfo)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+	gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
+	struct radv_shader_variant *variant = pipeline->shaders[stage];
+	struct _mesa_string_buffer *buf;
+	VkResult result = VK_SUCCESS;
+
+	/* Spec doesn't indicate what to do if the stage is invalid, so just
+	 * return no info for this. */
+	if (!variant)
+		return VK_ERROR_FEATURE_NOT_PRESENT;
+
+	switch (infoType) {
+	case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
+		if (!pInfo) {
+			*pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
+		} else {
+			struct ac_shader_config *conf = &variant->config;
+
+			VkShaderStatisticsInfoAMD statistics = {};
+			statistics.shaderStageMask = shaderStage;
+			statistics.resourceUsage.numUsedVgprs = conf->num_vgprs + conf->spilled_vgprs;
+			statistics.resourceUsage.numUsedSgprs = conf->num_sgprs + conf->spilled_sgprs;
+			statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
+			statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
+			statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
+			statistics.numPhysicalVgprs = statistics.numAvailableVgprs = 256;
+			statistics.numPhysicalSgprs = statistics.numAvailableSgprs = get_total_sgprs(device);
+			statistics.computeWorkGroupSize[0] = variant->nir->info.cs.local_size[0];
+			statistics.computeWorkGroupSize[1] = variant->nir->info.cs.local_size[1];
+			statistics.computeWorkGroupSize[2] = variant->nir->info.cs.local_size[2];
+
+			size_t size = *pInfoSize;
+			*pInfoSize = sizeof(statistics);
+
+			memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
+
+			if (size < *pInfoSize)
+				result = VK_INCOMPLETE;
+		}
+
+		break;
+	case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
+		buf = _mesa_string_buffer_create(NULL, 1024);
+
+		_mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
+		_mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
+		generate_shader_stats(device, variant, stage, buf);
+
+		if (!pInfo) {
+			*pInfoSize = buf->length;
+		} else {
+			size_t size = *pInfoSize;
+			*pInfoSize = buf->length;
+
+			memcpy(pInfo, buf->buf, MIN2(size, buf->length));
+
+			if (size < buf->length)
+				result = VK_INCOMPLETE;
+		}
+
+		_mesa_string_buffer_destroy(buf);
+		break;
+	default:
+		/* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
+		result = VK_ERROR_FEATURE_NOT_PRESENT;
+		break;
 	}
 
-	fprintf(file, "*** SHADER STATS ***\n"
-		"SGPRS: %d\n"
-		"VGPRS: %d\n"
-		"Spilled SGPRs: %d\n"
-		"Spilled VGPRs: %d\n"
-		"Code Size: %d bytes\n"
-		"LDS: %d blocks\n"
-		"Scratch: %d bytes per wave\n"
-		"Max Waves: %d\n"
-		"********************\n\n\n",
-		conf->num_sgprs, conf->num_vgprs,
-		conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
-		conf->lds_size, conf->scratch_bytes_per_wave,
-		max_simd_waves);
+	return result;
 }

Comments

I have something similar on my local tree (started on monday).

Though, I don't like the way we expose the number of VGPRS/SGPRS because 
we can't really figure out the number of spilled ones.

On 10/25/2017 01:18 PM, Alex Smith wrote:
> This allows an app to query shader statistics and get a disassembly of
> a shader. RenderDoc git has support for it, so this allows you to view
> shader disassembly from a capture.
> 
> When this extension is enabled on a device (or when tracing), we now
> disable pipeline caching, since we don't get the shader debug info when
> we retrieve cached shaders.
> 
> Signed-off-by: Alex Smith <asmith@feralinteractive.com>
> ---
>   src/amd/vulkan/radv_device.c         |   9 ++
>   src/amd/vulkan/radv_extensions.py    |   1 +
>   src/amd/vulkan/radv_pipeline.c       |   2 +-
>   src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>   src/amd/vulkan/radv_private.h        |   3 +
>   src/amd/vulkan/radv_shader.c         | 163 ++++++++++++++++++++++++++++-------
>   6 files changed, 154 insertions(+), 35 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index c4e25222ea..5603551680 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>   	VkResult result;
>   	struct radv_device *device;
>   
> +	bool keep_shader_info = false;
> +
>   	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
>   		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
>   		if (!radv_physical_device_extension_supported(physical_device, ext_name))
>   			return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
> +
> +		if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
> +			keep_shader_info = true;
>   	}
>   
>   	/* Check enabled features */
> @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>   		device->physical_device->rad_info.max_se >= 2;
>   
>   	if (getenv("RADV_TRACE_FILE")) {
> +		keep_shader_info = true;
> +
>   		if (!radv_init_trace(device))
>   			goto fail;
>   	}
>   
> +	device->keep_shader_info = keep_shader_info;
> +
>   	result = radv_device_init_meta(device);
>   	if (result != VK_SUCCESS)
>   		goto fail;
> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
> index dfeb2880fc..eeb679d65a 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -81,6 +81,7 @@ EXTENSIONS = [
>       Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
>       Extension('VK_AMD_draw_indirect_count',               1, True),
>       Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
> +    Extension('VK_AMD_shader_info',                       1, True),
>   ]
>   
>   class VkVersion:
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index d6b33a5327..2df03a83cf 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
>   			if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
>   				nir_print_shader(nir[i], stderr);
>   
> -			if (!pipeline->device->trace_bo)
> +			if (!pipeline->device->keep_shader_info)
>   				ralloc_free(nir[i]);
>   		}
>   	}
> diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
> index 9ba9a3b61b..46198799a7 100644
> --- a/src/amd/vulkan/radv_pipeline_cache.c
> +++ b/src/amd/vulkan/radv_pipeline_cache.c
> @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
>   	cache->hash_table = malloc(byte_size);
>   
>   	/* We don't consider allocation failure fatal, we just start with a 0-sized
> -	 * cache. */
> +	 * cache. Disable caching when we want to keep shader debug info, since
> +	 * we don't get the debug info on cached shaders. */
>   	if (cache->hash_table == NULL ||
> -	    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
> +	    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
> +	    device->keep_shader_info)
>   		cache->table_size = 0;
>   	else
>   		memset(cache->hash_table, 0, byte_size);
> @@ -186,8 +188,11 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
>   	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>   
>   	if (!entry) {
> +		/* Again, don't cache when we want debug info, since this isn't
> +		 * present in the cache. */
>   		if (!device->physical_device->disk_cache ||
> -		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
> +		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
> +		    device->keep_shader_info) {
>   			pthread_mutex_unlock(&cache->mutex);
>   			return false;
>   		}
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index a4e52b2530..169df5f37b 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -552,6 +552,9 @@ struct radv_device {
>   	struct radeon_winsys_bo                      *trace_bo;
>   	uint32_t                                     *trace_id_ptr;
>   
> +	/* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
> +	bool                                         keep_shader_info;
> +
>   	struct radv_physical_device                  *physical_device;
>   
>   	/* Backup in-memory cache to be used if the app doesn't provide one */
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 5903917068..7f2f0fd750 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -46,6 +46,8 @@
>   #include "util/debug.h"
>   #include "ac_exp_param.h"
>   
> +#include "util/string_buffer.h"
> +
>   static const struct nir_shader_compiler_options nir_options = {
>   	.vertex_id_zero_based = true,
>   	.lower_scmp = true,
> @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device,
>   	free(binary.relocs);
>   	variant->ref_count = 1;
>   
> -	if (device->trace_bo) {
> +	if (device->keep_shader_info) {
>   		variant->disasm_string = binary.disasm_string;
>   		if (!gs_copy_shader && !module->nir) {
>   			variant->nir = *shaders;
> @@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
>   	};
>   }
>   
> -void
> -radv_shader_dump_stats(struct radv_device *device,
> -		       struct radv_shader_variant *variant,
> -		       gl_shader_stage stage,
> -		       FILE *file)
> +static uint32_t
> +get_total_sgprs(struct radv_device *device)
> +{
> +	if (device->physical_device->rad_info.chip_class >= VI)
> +		return 800;
> +	else
> +		return 512;
> +}
> +
> +static void
> +generate_shader_stats(struct radv_device *device,
> +		      struct radv_shader_variant *variant,
> +		      gl_shader_stage stage,
> +		      struct _mesa_string_buffer *buf)
>   {
>   	unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>   	struct ac_shader_config *conf;
> @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device,
>   				     lds_increment);
>   	}
>   
> -	if (conf->num_sgprs) {
> -		if (device->physical_device->rad_info.chip_class >= VI)
> -			max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
> -		else
> -			max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
> -	}
> +	if (conf->num_sgprs)
> +		max_simd_waves = MIN2(max_simd_waves, get_total_sgprs(device) / conf->num_sgprs);
>   
>   	if (conf->num_vgprs)
>   		max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
> @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device,
>   	if (lds_per_wave)
>   		max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
>   
> +	if (stage == MESA_SHADER_FRAGMENT) {
> +		_mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
> +					   "SPI_PS_INPUT_ADDR = 0x%04x\n"
> +					   "SPI_PS_INPUT_ENA  = 0x%04x\n",
> +					   conf->spi_ps_input_addr, conf->spi_ps_input_ena);
> +	}
> +
> +	_mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
> +				   "SGPRS: %d\n"
> +				   "VGPRS: %d\n"
> +				   "Spilled SGPRs: %d\n"
> +				   "Spilled VGPRs: %d\n"
> +				   "Code Size: %d bytes\n"
> +				   "LDS: %d blocks\n"
> +				   "Scratch: %d bytes per wave\n"
> +				   "Max Waves: %d\n"
> +				   "********************\n\n\n",
> +				   conf->num_sgprs, conf->num_vgprs,
> +				   conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
> +				   conf->lds_size, conf->scratch_bytes_per_wave,
> +				   max_simd_waves);
> +}
> +
> +void
> +radv_shader_dump_stats(struct radv_device *device,
> +		       struct radv_shader_variant *variant,
> +		       gl_shader_stage stage,
> +		       FILE *file)
> +{
> +	struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256);
> +
> +	generate_shader_stats(device, variant, stage, buf);
> +
>   	fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
> +	fprintf(file, buf->buf);
>   
> -	if (stage == MESA_SHADER_FRAGMENT) {
> -		fprintf(file, "*** SHADER CONFIG ***\n"
> -			"SPI_PS_INPUT_ADDR = 0x%04x\n"
> -			"SPI_PS_INPUT_ENA  = 0x%04x\n",
> -			conf->spi_ps_input_addr, conf->spi_ps_input_ena);
> +	_mesa_string_buffer_destroy(buf);
> +}
> +
> +VkResult
> +radv_GetShaderInfoAMD(VkDevice _device,
> +		      VkPipeline _pipeline,
> +		      VkShaderStageFlagBits shaderStage,
> +		      VkShaderInfoTypeAMD infoType,
> +		      size_t* pInfoSize,
> +		      void* pInfo)
> +{
> +	RADV_FROM_HANDLE(radv_device, device, _device);
> +	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
> +	gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
> +	struct radv_shader_variant *variant = pipeline->shaders[stage];
> +	struct _mesa_string_buffer *buf;
> +	VkResult result = VK_SUCCESS;
> +
> +	/* Spec doesn't indicate what to do if the stage is invalid, so just
> +	 * return no info for this. */
> +	if (!variant)
> +		return VK_ERROR_FEATURE_NOT_PRESENT;
> +
> +	switch (infoType) {
> +	case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
> +		if (!pInfo) {
> +			*pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
> +		} else {
> +			struct ac_shader_config *conf = &variant->config;
> +
> +			VkShaderStatisticsInfoAMD statistics = {};
> +			statistics.shaderStageMask = shaderStage;
> +			statistics.resourceUsage.numUsedVgprs = conf->num_vgprs + conf->spilled_vgprs;
> +			statistics.resourceUsage.numUsedSgprs = conf->num_sgprs + conf->spilled_sgprs;
> +			statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
> +			statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
> +			statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
> +			statistics.numPhysicalVgprs = statistics.numAvailableVgprs = 256;
> +			statistics.numPhysicalSgprs = statistics.numAvailableSgprs = get_total_sgprs(device);
> +			statistics.computeWorkGroupSize[0] = variant->nir->info.cs.local_size[0];
> +			statistics.computeWorkGroupSize[1] = variant->nir->info.cs.local_size[1];
> +			statistics.computeWorkGroupSize[2] = variant->nir->info.cs.local_size[2];
> +
> +			size_t size = *pInfoSize;
> +			*pInfoSize = sizeof(statistics);
> +
> +			memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
> +
> +			if (size < *pInfoSize)
> +				result = VK_INCOMPLETE;
> +		}
> +
> +		break;
> +	case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
> +		buf = _mesa_string_buffer_create(NULL, 1024);
> +
> +		_mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
> +		_mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
> +		generate_shader_stats(device, variant, stage, buf);
> +
> +		if (!pInfo) {
> +			*pInfoSize = buf->length;
> +		} else {
> +			size_t size = *pInfoSize;
> +			*pInfoSize = buf->length;
> +
> +			memcpy(pInfo, buf->buf, MIN2(size, buf->length));
> +
> +			if (size < buf->length)
> +				result = VK_INCOMPLETE;
> +		}
> +
> +		_mesa_string_buffer_destroy(buf);
> +		break;
> +	default:
> +		/* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
> +		result = VK_ERROR_FEATURE_NOT_PRESENT;
> +		break;
>   	}
>   
> -	fprintf(file, "*** SHADER STATS ***\n"
> -		"SGPRS: %d\n"
> -		"VGPRS: %d\n"
> -		"Spilled SGPRs: %d\n"
> -		"Spilled VGPRs: %d\n"
> -		"Code Size: %d bytes\n"
> -		"LDS: %d blocks\n"
> -		"Scratch: %d bytes per wave\n"
> -		"Max Waves: %d\n"
> -		"********************\n\n\n",
> -		conf->num_sgprs, conf->num_vgprs,
> -		conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
> -		conf->lds_size, conf->scratch_bytes_per_wave,
> -		max_simd_waves);
> +	return result;
>   }
>
On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset@gmail.com>
wrote:

> I have something similar on my local tree (started on monday).
>
> Though, I don't like the way we expose the number of VGPRS/SGPRS because
> we can't really figure out the number of spilled ones.


My assumption was that if we've spilled then we've used all available
registers, so if numUsed{V,S}gprs is greater than the number available,
then you'd know that the number spilled is the difference between the two.
Can we have spilling when num_{v,s}gprs is less than the number available?

Alex


>
>
> On 10/25/2017 01:18 PM, Alex Smith wrote:
>
>> This allows an app to query shader statistics and get a disassembly of
>> a shader. RenderDoc git has support for it, so this allows you to view
>> shader disassembly from a capture.
>>
>> When this extension is enabled on a device (or when tracing), we now
>> disable pipeline caching, since we don't get the shader debug info when
>> we retrieve cached shaders.
>>
>> Signed-off-by: Alex Smith <asmith@feralinteractive.com>
>> ---
>>   src/amd/vulkan/radv_device.c         |   9 ++
>>   src/amd/vulkan/radv_extensions.py    |   1 +
>>   src/amd/vulkan/radv_pipeline.c       |   2 +-
>>   src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>>   src/amd/vulkan/radv_private.h        |   3 +
>>   src/amd/vulkan/radv_shader.c         | 163
>> ++++++++++++++++++++++++++++-------
>>   6 files changed, 154 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index c4e25222ea..5603551680 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>>         VkResult result;
>>         struct radv_device *device;
>>   +     bool keep_shader_info = false;
>> +
>>         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount;
>> i++) {
>>                 const char *ext_name = pCreateInfo->ppEnabledExtensio
>> nNames[i];
>>                 if (!radv_physical_device_extension_supported(physical_device,
>> ext_name))
>>                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>> +
>> +               if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME)
>> == 0)
>> +                       keep_shader_info = true;
>>         }
>>         /* Check enabled features */
>> @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>>                 device->physical_device->rad_info.max_se >= 2;
>>         if (getenv("RADV_TRACE_FILE")) {
>> +               keep_shader_info = true;
>> +
>>                 if (!radv_init_trace(device))
>>                         goto fail;
>>         }
>>   +     device->keep_shader_info = keep_shader_info;
>> +
>>         result = radv_device_init_meta(device);
>>         if (result != VK_SUCCESS)
>>                 goto fail;
>> diff --git a/src/amd/vulkan/radv_extensions.py
>> b/src/amd/vulkan/radv_extensions.py
>> index dfeb2880fc..eeb679d65a 100644
>> --- a/src/amd/vulkan/radv_extensions.py
>> +++ b/src/amd/vulkan/radv_extensions.py
>> @@ -81,6 +81,7 @@ EXTENSIONS = [
>>       Extension('VK_EXT_global_priority',                   1,
>> 'device->rad_info.has_ctx_priority'),
>>       Extension('VK_AMD_draw_indirect_count',               1, True),
>>       Extension('VK_AMD_rasterization_order',               1,
>> 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
>> +    Extension('VK_AMD_shader_info',                       1, True),
>>   ]
>>     class VkVersion:
>> diff --git a/src/amd/vulkan/radv_pipeline.c
>> b/src/amd/vulkan/radv_pipeline.c
>> index d6b33a5327..2df03a83cf 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline
>> *pipeline,
>>                         if (device->instance->debug_flags &
>> RADV_DEBUG_DUMP_SHADERS)
>>                                 nir_print_shader(nir[i], stderr);
>>   -                     if (!pipeline->device->trace_bo)
>> +                       if (!pipeline->device->keep_shader_info)
>>                                 ralloc_free(nir[i]);
>>                 }
>>         }
>> diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>> b/src/amd/vulkan/radv_pipeline_cache.c
>> index 9ba9a3b61b..46198799a7 100644
>> --- a/src/amd/vulkan/radv_pipeline_cache.c
>> +++ b/src/amd/vulkan/radv_pipeline_cache.c
>> @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct radv_pipeline_cache
>> *cache,
>>         cache->hash_table = malloc(byte_size);
>>         /* We don't consider allocation failure fatal, we just start with
>> a 0-sized
>> -        * cache. */
>> +        * cache. Disable caching when we want to keep shader debug info,
>> since
>> +        * we don't get the debug info on cached shaders. */
>>         if (cache->hash_table == NULL ||
>> -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>> +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
>> +           device->keep_shader_info)
>>                 cache->table_size = 0;
>>         else
>>                 memset(cache->hash_table, 0, byte_size);
>> @@ -186,8 +188,11 @@ radv_create_shader_variants_from_pipeline_cache(struct
>> radv_device *device,
>>         entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>>         if (!entry) {
>> +               /* Again, don't cache when we want debug info, since this
>> isn't
>> +                * present in the cache. */
>>                 if (!device->physical_device->disk_cache ||
>> -                   (device->instance->debug_flags &
>> RADV_DEBUG_NO_CACHE)) {
>> +                   (device->instance->debug_flags &
>> RADV_DEBUG_NO_CACHE) ||
>> +                   device->keep_shader_info) {
>>                         pthread_mutex_unlock(&cache->mutex);
>>                         return false;
>>                 }
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.
>> h
>> index a4e52b2530..169df5f37b 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -552,6 +552,9 @@ struct radv_device {
>>         struct radeon_winsys_bo                      *trace_bo;
>>         uint32_t                                     *trace_id_ptr;
>>   +     /* Whether to keep shader debug info, for tracing or
>> VK_AMD_shader_info */
>> +       bool                                         keep_shader_info;
>> +
>>         struct radv_physical_device                  *physical_device;
>>         /* Backup in-memory cache to be used if the app doesn't provide
>> one */
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 5903917068..7f2f0fd750 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -46,6 +46,8 @@
>>   #include "util/debug.h"
>>   #include "ac_exp_param.h"
>>   +#include "util/string_buffer.h"
>> +
>>   static const struct nir_shader_compiler_options nir_options = {
>>         .vertex_id_zero_based = true,
>>         .lower_scmp = true,
>> @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device,
>>         free(binary.relocs);
>>         variant->ref_count = 1;
>>   -     if (device->trace_bo) {
>> +       if (device->keep_shader_info) {
>>                 variant->disasm_string = binary.disasm_string;
>>                 if (!gs_copy_shader && !module->nir) {
>>                         variant->nir = *shaders;
>> @@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant
>> *var, gl_shader_stage stage)
>>         };
>>   }
>>   -void
>> -radv_shader_dump_stats(struct radv_device *device,
>> -                      struct radv_shader_variant *variant,
>> -                      gl_shader_stage stage,
>> -                      FILE *file)
>> +static uint32_t
>> +get_total_sgprs(struct radv_device *device)
>> +{
>> +       if (device->physical_device->rad_info.chip_class >= VI)
>> +               return 800;
>> +       else
>> +               return 512;
>> +}
>> +
>> +static void
>> +generate_shader_stats(struct radv_device *device,
>> +                     struct radv_shader_variant *variant,
>> +                     gl_shader_stage stage,
>> +                     struct _mesa_string_buffer *buf)
>>   {
>>         unsigned lds_increment = device->physical_device->rad_info.chip_class
>> >= CIK ? 512 : 256;
>>         struct ac_shader_config *conf;
>> @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device,
>>                                      lds_increment);
>>         }
>>   -     if (conf->num_sgprs) {
>> -               if (device->physical_device->rad_info.chip_class >= VI)
>> -                       max_simd_waves = MIN2(max_simd_waves, 800 /
>> conf->num_sgprs);
>> -               else
>> -                       max_simd_waves = MIN2(max_simd_waves, 512 /
>> conf->num_sgprs);
>> -       }
>> +       if (conf->num_sgprs)
>> +               max_simd_waves = MIN2(max_simd_waves,
>> get_total_sgprs(device) / conf->num_sgprs);
>>         if (conf->num_vgprs)
>>                 max_simd_waves = MIN2(max_simd_waves, 256 /
>> conf->num_vgprs);
>> @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device,
>>         if (lds_per_wave)
>>                 max_simd_waves = MIN2(max_simd_waves, 16384 /
>> lds_per_wave);
>>   +     if (stage == MESA_SHADER_FRAGMENT) {
>> +               _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
>> +                                          "SPI_PS_INPUT_ADDR = 0x%04x\n"
>> +                                          "SPI_PS_INPUT_ENA  = 0x%04x\n",
>> +                                          conf->spi_ps_input_addr,
>> conf->spi_ps_input_ena);
>> +       }
>> +
>> +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>> +                                  "SGPRS: %d\n"
>> +                                  "VGPRS: %d\n"
>> +                                  "Spilled SGPRs: %d\n"
>> +                                  "Spilled VGPRs: %d\n"
>> +                                  "Code Size: %d bytes\n"
>> +                                  "LDS: %d blocks\n"
>> +                                  "Scratch: %d bytes per wave\n"
>> +                                  "Max Waves: %d\n"
>> +                                  "********************\n\n\n",
>> +                                  conf->num_sgprs, conf->num_vgprs,
>> +                                  conf->spilled_sgprs,
>> conf->spilled_vgprs, variant->code_size,
>> +                                  conf->lds_size,
>> conf->scratch_bytes_per_wave,
>> +                                  max_simd_waves);
>> +}
>> +
>> +void
>> +radv_shader_dump_stats(struct radv_device *device,
>> +                      struct radv_shader_variant *variant,
>> +                      gl_shader_stage stage,
>> +                      FILE *file)
>> +{
>> +       struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL,
>> 256);
>> +
>> +       generate_shader_stats(device, variant, stage, buf);
>> +
>>         fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
>> +       fprintf(file, buf->buf);
>>   -     if (stage == MESA_SHADER_FRAGMENT) {
>> -               fprintf(file, "*** SHADER CONFIG ***\n"
>> -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>> -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>> -                       conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>> +       _mesa_string_buffer_destroy(buf);
>> +}
>> +
>> +VkResult
>> +radv_GetShaderInfoAMD(VkDevice _device,
>> +                     VkPipeline _pipeline,
>> +                     VkShaderStageFlagBits shaderStage,
>> +                     VkShaderInfoTypeAMD infoType,
>> +                     size_t* pInfoSize,
>> +                     void* pInfo)
>> +{
>> +       RADV_FROM_HANDLE(radv_device, device, _device);
>> +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>> +       gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
>> +       struct radv_shader_variant *variant = pipeline->shaders[stage];
>> +       struct _mesa_string_buffer *buf;
>> +       VkResult result = VK_SUCCESS;
>> +
>> +       /* Spec doesn't indicate what to do if the stage is invalid, so
>> just
>> +        * return no info for this. */
>> +       if (!variant)
>> +               return VK_ERROR_FEATURE_NOT_PRESENT;
>> +
>> +       switch (infoType) {
>> +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>> +               if (!pInfo) {
>> +                       *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
>> +               } else {
>> +                       struct ac_shader_config *conf = &variant->config;
>> +
>> +                       VkShaderStatisticsInfoAMD statistics = {};
>> +                       statistics.shaderStageMask = shaderStage;
>> +                       statistics.resourceUsage.numUsedVgprs =
>> conf->num_vgprs + conf->spilled_vgprs;
>> +                       statistics.resourceUsage.numUsedSgprs =
>> conf->num_sgprs + conf->spilled_sgprs;
>> +                       statistics.resourceUsage.ldsSizePerLocalWorkGroup
>> = 16384;
>> +                       statistics.resourceUsage.ldsUsageSizeInBytes =
>> conf->lds_size;
>> +                       statistics.resourceUsage.scratchMemUsageInBytes
>> = conf->scratch_bytes_per_wave;
>> +                       statistics.numPhysicalVgprs =
>> statistics.numAvailableVgprs = 256;
>> +                       statistics.numPhysicalSgprs =
>> statistics.numAvailableSgprs = get_total_sgprs(device);
>> +                       statistics.computeWorkGroupSize[0] =
>> variant->nir->info.cs.local_size[0];
>> +                       statistics.computeWorkGroupSize[1] =
>> variant->nir->info.cs.local_size[1];
>> +                       statistics.computeWorkGroupSize[2] =
>> variant->nir->info.cs.local_size[2];
>> +
>> +                       size_t size = *pInfoSize;
>> +                       *pInfoSize = sizeof(statistics);
>> +
>> +                       memcpy(pInfo, &statistics, MIN2(size,
>> *pInfoSize));
>> +
>> +                       if (size < *pInfoSize)
>> +                               result = VK_INCOMPLETE;
>> +               }
>> +
>> +               break;
>> +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>> +               buf = _mesa_string_buffer_create(NULL, 1024);
>> +
>> +               _mesa_string_buffer_printf(buf, "%s:\n",
>> radv_get_shader_name(variant, stage));
>> +               _mesa_string_buffer_printf(buf, "%s\n\n",
>> variant->disasm_string);
>> +               generate_shader_stats(device, variant, stage, buf);
>> +
>> +               if (!pInfo) {
>> +                       *pInfoSize = buf->length;
>> +               } else {
>> +                       size_t size = *pInfoSize;
>> +                       *pInfoSize = buf->length;
>> +
>> +                       memcpy(pInfo, buf->buf, MIN2(size, buf->length));
>> +
>> +                       if (size < buf->length)
>> +                               result = VK_INCOMPLETE;
>> +               }
>> +
>> +               _mesa_string_buffer_destroy(buf);
>> +               break;
>> +       default:
>> +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now.
>> */
>> +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>> +               break;
>>         }
>>   -     fprintf(file, "*** SHADER STATS ***\n"
>> -               "SGPRS: %d\n"
>> -               "VGPRS: %d\n"
>> -               "Spilled SGPRs: %d\n"
>> -               "Spilled VGPRs: %d\n"
>> -               "Code Size: %d bytes\n"
>> -               "LDS: %d blocks\n"
>> -               "Scratch: %d bytes per wave\n"
>> -               "Max Waves: %d\n"
>> -               "********************\n\n\n",
>> -               conf->num_sgprs, conf->num_vgprs,
>> -               conf->spilled_sgprs, conf->spilled_vgprs,
>> variant->code_size,
>> -               conf->lds_size, conf->scratch_bytes_per_wave,
>> -               max_simd_waves);
>> +       return result;
>>   }
>>
>>
On 10/25/2017 02:20 PM, Alex Smith wrote:
> On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset@gmail.com 
> <mailto:samuel.pitoiset@gmail.com>> wrote:
> 
>     I have something similar on my local tree (started on monday).
> 
>     Though, I don't like the way we expose the number of VGPRS/SGPRS
>     because we can't really figure out the number of spilled ones.
> 
> 
> My assumption was that if we've spilled then we've used all available 
> registers, so if numUsed{V,S}gprs is greater than the number available, 
> then you'd know that the number spilled is the difference between the 
> two. Can we have spilling when num_{v,s}gprs is less than the number 
> available?

Assuming the number of waves per CU is 4, I would go with:

num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves 
(aligned down to 4).

(or we can just set num_available_vgprs to conf->num_vgprs and return 
num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).

That way, if num_used_vgprs is greater than num_available_vgprs we know 
that we are spilling some vgprs.

For the number of available SGPRs, I think we can just hardcode the 
value to 104 for now.

Also with this, we can easily re-compute the maximum number of waves.

> 
> Alex
> 
> 
> 
>     On 10/25/2017 01:18 PM, Alex Smith wrote:
> 
>         This allows an app to query shader statistics and get a
>         disassembly of
>         a shader. RenderDoc git has support for it, so this allows you
>         to view
>         shader disassembly from a capture.
> 
>         When this extension is enabled on a device (or when tracing), we now
>         disable pipeline caching, since we don't get the shader debug
>         info when
>         we retrieve cached shaders.
> 
>         Signed-off-by: Alex Smith <asmith@feralinteractive.com
>         <mailto:asmith@feralinteractive.com>>
>         ---
>            src/amd/vulkan/radv_device.c         |   9 ++
>            src/amd/vulkan/radv_extensions.py    |   1 +
>            src/amd/vulkan/radv_pipeline.c       |   2 +-
>            src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>            src/amd/vulkan/radv_private.h        |   3 +
>            src/amd/vulkan/radv_shader.c         | 163
>         ++++++++++++++++++++++++++++-------
>            6 files changed, 154 insertions(+), 35 deletions(-)
> 
>         diff --git a/src/amd/vulkan/radv_device.c
>         b/src/amd/vulkan/radv_device.c
>         index c4e25222ea..5603551680 100644
>         --- a/src/amd/vulkan/radv_device.c
>         +++ b/src/amd/vulkan/radv_device.c
>         @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>                  VkResult result;
>                  struct radv_device *device;
>            +     bool keep_shader_info = false;
>         +
>                  for (uint32_t i = 0; i <
>         pCreateInfo->enabledExtensionCount; i++) {
>                          const char *ext_name =
>         pCreateInfo->ppEnabledExtensionNames[i];
>                          if
>         (!radv_physical_device_extension_supported(physical_device,
>         ext_name))
>                                  return
>         vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>         +
>         +               if (strcmp(ext_name,
>         VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
>         +                       keep_shader_info = true;
>                  }
>                  /* Check enabled features */
>         @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>                          device->physical_device->rad_info.max_se >= 2;
>                  if (getenv("RADV_TRACE_FILE")) {
>         +               keep_shader_info = true;
>         +
>                          if (!radv_init_trace(device))
>                                  goto fail;
>                  }
>            +     device->keep_shader_info = keep_shader_info;
>         +
>                  result = radv_device_init_meta(device);
>                  if (result != VK_SUCCESS)
>                          goto fail;
>         diff --git a/src/amd/vulkan/radv_extensions.py
>         b/src/amd/vulkan/radv_extensions.py
>         index dfeb2880fc..eeb679d65a 100644
>         --- a/src/amd/vulkan/radv_extensions.py
>         +++ b/src/amd/vulkan/radv_extensions.py
>         @@ -81,6 +81,7 @@ EXTENSIONS = [
>                Extension('VK_EXT_global_priority',                   1,
>         'device->rad_info.has_ctx_priority'),
>                Extension('VK_AMD_draw_indirect_count',               1,
>         True),
>                Extension('VK_AMD_rasterization_order',               1,
>         'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
>         2'),
>         +    Extension('VK_AMD_shader_info',                       1, True),
>            ]
>              class VkVersion:
>         diff --git a/src/amd/vulkan/radv_pipeline.c
>         b/src/amd/vulkan/radv_pipeline.c
>         index d6b33a5327..2df03a83cf 100644
>         --- a/src/amd/vulkan/radv_pipeline.c
>         +++ b/src/amd/vulkan/radv_pipeline.c
>         @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
>         radv_pipeline *pipeline,
>                                  if (device->instance->debug_flags &
>         RADV_DEBUG_DUMP_SHADERS)
>                                          nir_print_shader(nir[i], stderr);
>            -                     if (!pipeline->device->trace_bo)
>         +                       if (!pipeline->device->keep_shader_info)
>                                          ralloc_free(nir[i]);
>                          }
>                  }
>         diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>         b/src/amd/vulkan/radv_pipeline_cache.c
>         index 9ba9a3b61b..46198799a7 100644
>         --- a/src/amd/vulkan/radv_pipeline_cache.c
>         +++ b/src/amd/vulkan/radv_pipeline_cache.c
>         @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
>         radv_pipeline_cache *cache,
>                  cache->hash_table = malloc(byte_size);
>                  /* We don't consider allocation failure fatal, we just
>         start with a 0-sized
>         -        * cache. */
>         +        * cache. Disable caching when we want to keep shader
>         debug info, since
>         +        * we don't get the debug info on cached shaders. */
>                  if (cache->hash_table == NULL ||
>         -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>         +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
>         +           device->keep_shader_info)
>                          cache->table_size = 0;
>                  else
>                          memset(cache->hash_table, 0, byte_size);
>         @@ -186,8 +188,11 @@
>         radv_create_shader_variants_from_pipeline_cache(struct
>         radv_device *device,
>                  entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>                  if (!entry) {
>         +               /* Again, don't cache when we want debug info,
>         since this isn't
>         +                * present in the cache. */
>                          if (!device->physical_device->disk_cache ||
>         -                   (device->instance->debug_flags &
>         RADV_DEBUG_NO_CACHE)) {
>         +                   (device->instance->debug_flags &
>         RADV_DEBUG_NO_CACHE) ||
>         +                   device->keep_shader_info) {
>                                  pthread_mutex_unlock(&cache->mutex);
>                                  return false;
>                          }
>         diff --git a/src/amd/vulkan/radv_private.h
>         b/src/amd/vulkan/radv_private.h
>         index a4e52b2530..169df5f37b 100644
>         --- a/src/amd/vulkan/radv_private.h
>         +++ b/src/amd/vulkan/radv_private.h
>         @@ -552,6 +552,9 @@ struct radv_device {
>                  struct radeon_winsys_bo                      *trace_bo;
>                  uint32_t                                     *trace_id_ptr;
>            +     /* Whether to keep shader debug info, for tracing or
>         VK_AMD_shader_info */
>         +       bool                                       
>           keep_shader_info;
>         +
>                  struct radv_physical_device                 
>         *physical_device;
>                  /* Backup in-memory cache to be used if the app doesn't
>         provide one */
>         diff --git a/src/amd/vulkan/radv_shader.c
>         b/src/amd/vulkan/radv_shader.c
>         index 5903917068..7f2f0fd750 100644
>         --- a/src/amd/vulkan/radv_shader.c
>         +++ b/src/amd/vulkan/radv_shader.c
>         @@ -46,6 +46,8 @@
>            #include "util/debug.h"
>            #include "ac_exp_param.h"
>            +#include "util/string_buffer.h"
>         +
>            static const struct nir_shader_compiler_options nir_options = {
>                  .vertex_id_zero_based = true,
>                  .lower_scmp = true,
>         @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
>         *device,
>                  free(binary.relocs);
>                  variant->ref_count = 1;
>            -     if (device->trace_bo) {
>         +       if (device->keep_shader_info) {
>                          variant->disasm_string = binary.disasm_string;
>                          if (!gs_copy_shader && !module->nir) {
>                                  variant->nir = *shaders;
>         @@ -593,11 +595,20 @@ radv_get_shader_name(struct
>         radv_shader_variant *var, gl_shader_stage stage)
>                  };
>            }
>            -void
>         -radv_shader_dump_stats(struct radv_device *device,
>         -                      struct radv_shader_variant *variant,
>         -                      gl_shader_stage stage,
>         -                      FILE *file)
>         +static uint32_t
>         +get_total_sgprs(struct radv_device *device)
>         +{
>         +       if (device->physical_device->rad_info.chip_class >= VI)
>         +               return 800;
>         +       else
>         +               return 512;
>         +}
>         +
>         +static void
>         +generate_shader_stats(struct radv_device *device,
>         +                     struct radv_shader_variant *variant,
>         +                     gl_shader_stage stage,
>         +                     struct _mesa_string_buffer *buf)
>            {
>                  unsigned lds_increment =
>         device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>                  struct ac_shader_config *conf;
>         @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
>         *device,
>                                               lds_increment);
>                  }
>            -     if (conf->num_sgprs) {
>         -               if (device->physical_device->rad_info.chip_class
>          >= VI)
>         -                       max_simd_waves = MIN2(max_simd_waves,
>         800 / conf->num_sgprs);
>         -               else
>         -                       max_simd_waves = MIN2(max_simd_waves,
>         512 / conf->num_sgprs);
>         -       }
>         +       if (conf->num_sgprs)
>         +               max_simd_waves = MIN2(max_simd_waves,
>         get_total_sgprs(device) / conf->num_sgprs);
>                  if (conf->num_vgprs)
>                          max_simd_waves = MIN2(max_simd_waves, 256 /
>         conf->num_vgprs);
>         @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
>         *device,
>                  if (lds_per_wave)
>                          max_simd_waves = MIN2(max_simd_waves, 16384 /
>         lds_per_wave);
>            +     if (stage == MESA_SHADER_FRAGMENT) {
>         +               _mesa_string_buffer_printf(buf, "*** SHADER
>         CONFIG ***\n"
>         +                                          "SPI_PS_INPUT_ADDR =
>         0x%04x\n"
>         +                                          "SPI_PS_INPUT_ENA  =
>         0x%04x\n",
>         +                                         
>         conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>         +       }
>         +
>         +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>         +                                  "SGPRS: %d\n"
>         +                                  "VGPRS: %d\n"
>         +                                  "Spilled SGPRs: %d\n"
>         +                                  "Spilled VGPRs: %d\n"
>         +                                  "Code Size: %d bytes\n"
>         +                                  "LDS: %d blocks\n"
>         +                                  "Scratch: %d bytes per wave\n"
>         +                                  "Max Waves: %d\n"
>         +                                  "********************\n\n\n",
>         +                                  conf->num_sgprs, conf->num_vgprs,
>         +                                  conf->spilled_sgprs,
>         conf->spilled_vgprs, variant->code_size,
>         +                                  conf->lds_size,
>         conf->scratch_bytes_per_wave,
>         +                                  max_simd_waves);
>         +}
>         +
>         +void
>         +radv_shader_dump_stats(struct radv_device *device,
>         +                      struct radv_shader_variant *variant,
>         +                      gl_shader_stage stage,
>         +                      FILE *file)
>         +{
>         +       struct _mesa_string_buffer *buf =
>         _mesa_string_buffer_create(NULL, 256);
>         +
>         +       generate_shader_stats(device, variant, stage, buf);
>         +
>                  fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
>         stage));
>         +       fprintf(file, buf->buf);
>            -     if (stage == MESA_SHADER_FRAGMENT) {
>         -               fprintf(file, "*** SHADER CONFIG ***\n"
>         -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>         -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>         -                       conf->spi_ps_input_addr,
>         conf->spi_ps_input_ena);
>         +       _mesa_string_buffer_destroy(buf);
>         +}
>         +
>         +VkResult
>         +radv_GetShaderInfoAMD(VkDevice _device,
>         +                     VkPipeline _pipeline,
>         +                     VkShaderStageFlagBits shaderStage,
>         +                     VkShaderInfoTypeAMD infoType,
>         +                     size_t* pInfoSize,
>         +                     void* pInfo)
>         +{
>         +       RADV_FROM_HANDLE(radv_device, device, _device);
>         +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>         +       gl_shader_stage stage =
>         vk_to_mesa_shader_stage(shaderStage);
>         +       struct radv_shader_variant *variant =
>         pipeline->shaders[stage];
>         +       struct _mesa_string_buffer *buf;
>         +       VkResult result = VK_SUCCESS;
>         +
>         +       /* Spec doesn't indicate what to do if the stage is
>         invalid, so just
>         +        * return no info for this. */
>         +       if (!variant)
>         +               return VK_ERROR_FEATURE_NOT_PRESENT;
>         +
>         +       switch (infoType) {
>         +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>         +               if (!pInfo) {
>         +                       *pInfoSize =
>         sizeof(VkShaderStatisticsInfoAMD);
>         +               } else {
>         +                       struct ac_shader_config *conf =
>         &variant->config;
>         +
>         +                       VkShaderStatisticsInfoAMD statistics = {};
>         +                       statistics.shaderStageMask = shaderStage;
>         +                       statistics.resourceUsage.numUsedVgprs =
>         conf->num_vgprs + conf->spilled_vgprs;
>         +                       statistics.resourceUsage.numUsedSgprs =
>         conf->num_sgprs + conf->spilled_sgprs;
>         +                     
>           statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
>         +                     
>           statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
>         +                     
>           statistics.resourceUsage.scratchMemUsageInBytes =
>         conf->scratch_bytes_per_wave;
>         +                       statistics.numPhysicalVgprs =
>         statistics.numAvailableVgprs = 256;
>         +                       statistics.numPhysicalSgprs =
>         statistics.numAvailableSgprs = get_total_sgprs(device);
>         +                       statistics.computeWorkGroupSize[0] =
>         variant->nir->info.cs.local_size[0];
>         +                       statistics.computeWorkGroupSize[1] =
>         variant->nir->info.cs.local_size[1];
>         +                       statistics.computeWorkGroupSize[2] =
>         variant->nir->info.cs.local_size[2];
>         +
>         +                       size_t size = *pInfoSize;
>         +                       *pInfoSize = sizeof(statistics);
>         +
>         +                       memcpy(pInfo, &statistics, MIN2(size,
>         *pInfoSize));
>         +
>         +                       if (size < *pInfoSize)
>         +                               result = VK_INCOMPLETE;
>         +               }
>         +
>         +               break;
>         +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>         +               buf = _mesa_string_buffer_create(NULL, 1024);
>         +
>         +               _mesa_string_buffer_printf(buf, "%s:\n",
>         radv_get_shader_name(variant, stage));
>         +               _mesa_string_buffer_printf(buf, "%s\n\n",
>         variant->disasm_string);
>         +               generate_shader_stats(device, variant, stage, buf);
>         +
>         +               if (!pInfo) {
>         +                       *pInfoSize = buf->length;
>         +               } else {
>         +                       size_t size = *pInfoSize;
>         +                       *pInfoSize = buf->length;
>         +
>         +                       memcpy(pInfo, buf->buf, MIN2(size,
>         buf->length));
>         +
>         +                       if (size < buf->length)
>         +                               result = VK_INCOMPLETE;
>         +               }
>         +
>         +               _mesa_string_buffer_destroy(buf);
>         +               break;
>         +       default:
>         +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
>         for now. */
>         +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>         +               break;
>                  }
>            -     fprintf(file, "*** SHADER STATS ***\n"
>         -               "SGPRS: %d\n"
>         -               "VGPRS: %d\n"
>         -               "Spilled SGPRs: %d\n"
>         -               "Spilled VGPRs: %d\n"
>         -               "Code Size: %d bytes\n"
>         -               "LDS: %d blocks\n"
>         -               "Scratch: %d bytes per wave\n"
>         -               "Max Waves: %d\n"
>         -               "********************\n\n\n",
>         -               conf->num_sgprs, conf->num_vgprs,
>         -               conf->spilled_sgprs, conf->spilled_vgprs,
>         variant->code_size,
>         -               conf->lds_size, conf->scratch_bytes_per_wave,
>         -               max_simd_waves);
>         +       return result;
>            }
> 
>
On Wed, Oct 25, 2017 at 4:03 PM, Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
>
> On 10/25/2017 02:20 PM, Alex Smith wrote:
>>
>> On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset@gmail.com
>> <mailto:samuel.pitoiset@gmail.com>> wrote:
>>
>>     I have something similar on my local tree (started on monday).
>>
>>     Though, I don't like the way we expose the number of VGPRS/SGPRS
>>     because we can't really figure out the number of spilled ones.
>>
>>
>> My assumption was that if we've spilled then we've used all available
>> registers, so if numUsed{V,S}gprs is greater than the number available, then
>> you'd know that the number spilled is the difference between the two. Can we
>> have spilling when num_{v,s}gprs is less than the number available?
>
>
> Assuming the number of waves per CU is 4, I would go with:
>
> num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves (aligned
> down to 4).

for compute there is

num_available_vgprs (as LLVM sees as constraints) = num_physical_vgprs
/ ceil(compute_workgroup_size / 256)

for other stages it always is 256. (Until we implement the wave limit ext)

Reading from the spec I think it is unintuitive that the usedVgpr
stats include spilled registers though. I'd
expect to see just the physically used regs. Is this something that
Feral has tried on the official driver on any platform? I'd say to not
include the spilled regs (you can get it approximately with scratch
memory / 256), unless the official driver does otherwise, in which
case we should go for consistency.

>
> (or we can just set num_available_vgprs to conf->num_vgprs and return
> num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).
>
> That way, if num_used_vgprs is greater than num_available_vgprs we know that
> we are spilling some vgprs.
>
> For the number of available SGPRs, I think we can just hardcode the value to
> 104 for now.
>
> Also with this, we can easily re-compute the maximum number of waves.
>
>>
>> Alex
>>
>>
>>
>>     On 10/25/2017 01:18 PM, Alex Smith wrote:
>>
>>         This allows an app to query shader statistics and get a
>>         disassembly of
>>         a shader. RenderDoc git has support for it, so this allows you
>>         to view
>>         shader disassembly from a capture.
>>
>>         When this extension is enabled on a device (or when tracing), we
>> now
>>         disable pipeline caching, since we don't get the shader debug
>>         info when
>>         we retrieve cached shaders.
>>
>>         Signed-off-by: Alex Smith <asmith@feralinteractive.com
>>         <mailto:asmith@feralinteractive.com>>
>>
>>         ---
>>            src/amd/vulkan/radv_device.c         |   9 ++
>>            src/amd/vulkan/radv_extensions.py    |   1 +
>>            src/amd/vulkan/radv_pipeline.c       |   2 +-
>>            src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>>            src/amd/vulkan/radv_private.h        |   3 +
>>            src/amd/vulkan/radv_shader.c         | 163
>>         ++++++++++++++++++++++++++++-------
>>            6 files changed, 154 insertions(+), 35 deletions(-)
>>
>>         diff --git a/src/amd/vulkan/radv_device.c
>>         b/src/amd/vulkan/radv_device.c
>>         index c4e25222ea..5603551680 100644
>>         --- a/src/amd/vulkan/radv_device.c
>>         +++ b/src/amd/vulkan/radv_device.c
>>         @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>>                  VkResult result;
>>                  struct radv_device *device;
>>            +     bool keep_shader_info = false;
>>         +
>>                  for (uint32_t i = 0; i <
>>         pCreateInfo->enabledExtensionCount; i++) {
>>                          const char *ext_name =
>>         pCreateInfo->ppEnabledExtensionNames[i];
>>                          if
>>         (!radv_physical_device_extension_supported(physical_device,
>>         ext_name))
>>                                  return
>>         vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>>         +
>>         +               if (strcmp(ext_name,
>>         VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
>>         +                       keep_shader_info = true;
>>                  }
>>                  /* Check enabled features */
>>         @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>>                          device->physical_device->rad_info.max_se >= 2;
>>                  if (getenv("RADV_TRACE_FILE")) {
>>         +               keep_shader_info = true;
>>         +
>>                          if (!radv_init_trace(device))
>>                                  goto fail;
>>                  }
>>            +     device->keep_shader_info = keep_shader_info;
>>         +
>>                  result = radv_device_init_meta(device);
>>                  if (result != VK_SUCCESS)
>>                          goto fail;
>>         diff --git a/src/amd/vulkan/radv_extensions.py
>>         b/src/amd/vulkan/radv_extensions.py
>>         index dfeb2880fc..eeb679d65a 100644
>>         --- a/src/amd/vulkan/radv_extensions.py
>>         +++ b/src/amd/vulkan/radv_extensions.py
>>         @@ -81,6 +81,7 @@ EXTENSIONS = [
>>                Extension('VK_EXT_global_priority',                   1,
>>         'device->rad_info.has_ctx_priority'),
>>                Extension('VK_AMD_draw_indirect_count',               1,
>>         True),
>>                Extension('VK_AMD_rasterization_order',               1,
>>         'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
>>         2'),
>>         +    Extension('VK_AMD_shader_info',                       1,
>> True),
>>            ]
>>              class VkVersion:
>>         diff --git a/src/amd/vulkan/radv_pipeline.c
>>         b/src/amd/vulkan/radv_pipeline.c
>>         index d6b33a5327..2df03a83cf 100644
>>         --- a/src/amd/vulkan/radv_pipeline.c
>>         +++ b/src/amd/vulkan/radv_pipeline.c
>>         @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
>>         radv_pipeline *pipeline,
>>                                  if (device->instance->debug_flags &
>>         RADV_DEBUG_DUMP_SHADERS)
>>                                          nir_print_shader(nir[i], stderr);
>>            -                     if (!pipeline->device->trace_bo)
>>         +                       if (!pipeline->device->keep_shader_info)
>>                                          ralloc_free(nir[i]);
>>                          }
>>                  }
>>         diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>>         b/src/amd/vulkan/radv_pipeline_cache.c
>>         index 9ba9a3b61b..46198799a7 100644
>>         --- a/src/amd/vulkan/radv_pipeline_cache.c
>>         +++ b/src/amd/vulkan/radv_pipeline_cache.c
>>         @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
>>         radv_pipeline_cache *cache,
>>                  cache->hash_table = malloc(byte_size);
>>                  /* We don't consider allocation failure fatal, we just
>>         start with a 0-sized
>>         -        * cache. */
>>         +        * cache. Disable caching when we want to keep shader
>>         debug info, since
>>         +        * we don't get the debug info on cached shaders. */
>>                  if (cache->hash_table == NULL ||
>>         -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>>         +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)
>> ||
>>         +           device->keep_shader_info)
>>                          cache->table_size = 0;
>>                  else
>>                          memset(cache->hash_table, 0, byte_size);
>>         @@ -186,8 +188,11 @@
>>         radv_create_shader_variants_from_pipeline_cache(struct
>>         radv_device *device,
>>                  entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>>                  if (!entry) {
>>         +               /* Again, don't cache when we want debug info,
>>         since this isn't
>>         +                * present in the cache. */
>>                          if (!device->physical_device->disk_cache ||
>>         -                   (device->instance->debug_flags &
>>         RADV_DEBUG_NO_CACHE)) {
>>         +                   (device->instance->debug_flags &
>>         RADV_DEBUG_NO_CACHE) ||
>>         +                   device->keep_shader_info) {
>>                                  pthread_mutex_unlock(&cache->mutex);
>>                                  return false;
>>                          }
>>         diff --git a/src/amd/vulkan/radv_private.h
>>         b/src/amd/vulkan/radv_private.h
>>         index a4e52b2530..169df5f37b 100644
>>         --- a/src/amd/vulkan/radv_private.h
>>         +++ b/src/amd/vulkan/radv_private.h
>>         @@ -552,6 +552,9 @@ struct radv_device {
>>                  struct radeon_winsys_bo                      *trace_bo;
>>                  uint32_t
>> *trace_id_ptr;
>>            +     /* Whether to keep shader debug info, for tracing or
>>         VK_AMD_shader_info */
>>         +       bool
>> keep_shader_info;
>>         +
>>                  struct radv_physical_device
>> *physical_device;
>>                  /* Backup in-memory cache to be used if the app doesn't
>>         provide one */
>>         diff --git a/src/amd/vulkan/radv_shader.c
>>         b/src/amd/vulkan/radv_shader.c
>>         index 5903917068..7f2f0fd750 100644
>>         --- a/src/amd/vulkan/radv_shader.c
>>         +++ b/src/amd/vulkan/radv_shader.c
>>         @@ -46,6 +46,8 @@
>>            #include "util/debug.h"
>>            #include "ac_exp_param.h"
>>            +#include "util/string_buffer.h"
>>         +
>>            static const struct nir_shader_compiler_options nir_options = {
>>                  .vertex_id_zero_based = true,
>>                  .lower_scmp = true,
>>         @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
>>         *device,
>>                  free(binary.relocs);
>>                  variant->ref_count = 1;
>>            -     if (device->trace_bo) {
>>         +       if (device->keep_shader_info) {
>>                          variant->disasm_string = binary.disasm_string;
>>                          if (!gs_copy_shader && !module->nir) {
>>                                  variant->nir = *shaders;
>>         @@ -593,11 +595,20 @@ radv_get_shader_name(struct
>>         radv_shader_variant *var, gl_shader_stage stage)
>>                  };
>>            }
>>            -void
>>         -radv_shader_dump_stats(struct radv_device *device,
>>         -                      struct radv_shader_variant *variant,
>>         -                      gl_shader_stage stage,
>>         -                      FILE *file)
>>         +static uint32_t
>>         +get_total_sgprs(struct radv_device *device)
>>         +{
>>         +       if (device->physical_device->rad_info.chip_class >= VI)
>>         +               return 800;
>>         +       else
>>         +               return 512;
>>         +}
>>         +
>>         +static void
>>         +generate_shader_stats(struct radv_device *device,
>>         +                     struct radv_shader_variant *variant,
>>         +                     gl_shader_stage stage,
>>         +                     struct _mesa_string_buffer *buf)
>>            {
>>                  unsigned lds_increment =
>>         device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>>                  struct ac_shader_config *conf;
>>         @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
>>         *device,
>>                                               lds_increment);
>>                  }
>>            -     if (conf->num_sgprs) {
>>         -               if (device->physical_device->rad_info.chip_class
>>          >= VI)
>>         -                       max_simd_waves = MIN2(max_simd_waves,
>>         800 / conf->num_sgprs);
>>         -               else
>>         -                       max_simd_waves = MIN2(max_simd_waves,
>>         512 / conf->num_sgprs);
>>         -       }
>>         +       if (conf->num_sgprs)
>>         +               max_simd_waves = MIN2(max_simd_waves,
>>         get_total_sgprs(device) / conf->num_sgprs);
>>                  if (conf->num_vgprs)
>>                          max_simd_waves = MIN2(max_simd_waves, 256 /
>>         conf->num_vgprs);
>>         @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
>>         *device,
>>                  if (lds_per_wave)
>>                          max_simd_waves = MIN2(max_simd_waves, 16384 /
>>         lds_per_wave);
>>            +     if (stage == MESA_SHADER_FRAGMENT) {
>>         +               _mesa_string_buffer_printf(buf, "*** SHADER
>>         CONFIG ***\n"
>>         +                                          "SPI_PS_INPUT_ADDR =
>>         0x%04x\n"
>>         +                                          "SPI_PS_INPUT_ENA  =
>>         0x%04x\n",
>>         +
>> conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>>         +       }
>>         +
>>         +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>>         +                                  "SGPRS: %d\n"
>>         +                                  "VGPRS: %d\n"
>>         +                                  "Spilled SGPRs: %d\n"
>>         +                                  "Spilled VGPRs: %d\n"
>>         +                                  "Code Size: %d bytes\n"
>>         +                                  "LDS: %d blocks\n"
>>         +                                  "Scratch: %d bytes per wave\n"
>>         +                                  "Max Waves: %d\n"
>>         +                                  "********************\n\n\n",
>>         +                                  conf->num_sgprs,
>> conf->num_vgprs,
>>         +                                  conf->spilled_sgprs,
>>         conf->spilled_vgprs, variant->code_size,
>>         +                                  conf->lds_size,
>>         conf->scratch_bytes_per_wave,
>>         +                                  max_simd_waves);
>>         +}
>>         +
>>         +void
>>         +radv_shader_dump_stats(struct radv_device *device,
>>         +                      struct radv_shader_variant *variant,
>>         +                      gl_shader_stage stage,
>>         +                      FILE *file)
>>         +{
>>         +       struct _mesa_string_buffer *buf =
>>         _mesa_string_buffer_create(NULL, 256);
>>         +
>>         +       generate_shader_stats(device, variant, stage, buf);
>>         +
>>                  fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
>>         stage));
>>         +       fprintf(file, buf->buf);
>>            -     if (stage == MESA_SHADER_FRAGMENT) {
>>         -               fprintf(file, "*** SHADER CONFIG ***\n"
>>         -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>>         -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>>         -                       conf->spi_ps_input_addr,
>>         conf->spi_ps_input_ena);
>>         +       _mesa_string_buffer_destroy(buf);
>>         +}
>>         +
>>         +VkResult
>>         +radv_GetShaderInfoAMD(VkDevice _device,
>>         +                     VkPipeline _pipeline,
>>         +                     VkShaderStageFlagBits shaderStage,
>>         +                     VkShaderInfoTypeAMD infoType,
>>         +                     size_t* pInfoSize,
>>         +                     void* pInfo)
>>         +{
>>         +       RADV_FROM_HANDLE(radv_device, device, _device);
>>         +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>>         +       gl_shader_stage stage =
>>         vk_to_mesa_shader_stage(shaderStage);
>>         +       struct radv_shader_variant *variant =
>>         pipeline->shaders[stage];
>>         +       struct _mesa_string_buffer *buf;
>>         +       VkResult result = VK_SUCCESS;
>>         +
>>         +       /* Spec doesn't indicate what to do if the stage is
>>         invalid, so just
>>         +        * return no info for this. */
>>         +       if (!variant)
>>         +               return VK_ERROR_FEATURE_NOT_PRESENT;
>>         +
>>         +       switch (infoType) {
>>         +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>>         +               if (!pInfo) {
>>         +                       *pInfoSize =
>>         sizeof(VkShaderStatisticsInfoAMD);
>>         +               } else {
>>         +                       struct ac_shader_config *conf =
>>         &variant->config;
>>         +
>>         +                       VkShaderStatisticsInfoAMD statistics = {};
>>         +                       statistics.shaderStageMask = shaderStage;
>>         +                       statistics.resourceUsage.numUsedVgprs =
>>         conf->num_vgprs + conf->spilled_vgprs;
>>         +                       statistics.resourceUsage.numUsedSgprs =
>>         conf->num_sgprs + conf->spilled_sgprs;
>>         +
>> statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
>>         +
>> statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
>>         +
>> statistics.resourceUsage.scratchMemUsageInBytes =
>>         conf->scratch_bytes_per_wave;
>>         +                       statistics.numPhysicalVgprs =
>>         statistics.numAvailableVgprs = 256;
>>         +                       statistics.numPhysicalSgprs =
>>         statistics.numAvailableSgprs = get_total_sgprs(device);
>>         +                       statistics.computeWorkGroupSize[0] =
>>         variant->nir->info.cs.local_size[0];
>>         +                       statistics.computeWorkGroupSize[1] =
>>         variant->nir->info.cs.local_size[1];
>>         +                       statistics.computeWorkGroupSize[2] =
>>         variant->nir->info.cs.local_size[2];
>>         +
>>         +                       size_t size = *pInfoSize;
>>         +                       *pInfoSize = sizeof(statistics);
>>         +
>>         +                       memcpy(pInfo, &statistics, MIN2(size,
>>         *pInfoSize));
>>         +
>>         +                       if (size < *pInfoSize)
>>         +                               result = VK_INCOMPLETE;
>>         +               }
>>         +
>>         +               break;
>>         +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>>         +               buf = _mesa_string_buffer_create(NULL, 1024);
>>         +
>>         +               _mesa_string_buffer_printf(buf, "%s:\n",
>>         radv_get_shader_name(variant, stage));
>>         +               _mesa_string_buffer_printf(buf, "%s\n\n",
>>         variant->disasm_string);
>>         +               generate_shader_stats(device, variant, stage,
>> buf);
>>         +
>>         +               if (!pInfo) {
>>         +                       *pInfoSize = buf->length;
>>         +               } else {
>>         +                       size_t size = *pInfoSize;
>>         +                       *pInfoSize = buf->length;
>>         +
>>         +                       memcpy(pInfo, buf->buf, MIN2(size,
>>         buf->length));
>>         +
>>         +                       if (size < buf->length)
>>         +                               result = VK_INCOMPLETE;
>>         +               }
>>         +
>>         +               _mesa_string_buffer_destroy(buf);
>>         +               break;
>>         +       default:
>>         +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
>>         for now. */
>>         +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>>         +               break;
>>                  }
>>            -     fprintf(file, "*** SHADER STATS ***\n"
>>         -               "SGPRS: %d\n"
>>         -               "VGPRS: %d\n"
>>         -               "Spilled SGPRs: %d\n"
>>         -               "Spilled VGPRs: %d\n"
>>         -               "Code Size: %d bytes\n"
>>         -               "LDS: %d blocks\n"
>>         -               "Scratch: %d bytes per wave\n"
>>         -               "Max Waves: %d\n"
>>         -               "********************\n\n\n",
>>         -               conf->num_sgprs, conf->num_vgprs,
>>         -               conf->spilled_sgprs, conf->spilled_vgprs,
>>         variant->code_size,
>>         -               conf->lds_size, conf->scratch_bytes_per_wave,
>>         -               max_simd_waves);
>>         +       return result;
>>            }
>>
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 25 October 2017 at 21:58, Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
wrote:

> On Wed, Oct 25, 2017 at 4:03 PM, Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
> >
> >
> > On 10/25/2017 02:20 PM, Alex Smith wrote:
> >>
> >> On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset@gmail.com
> >> <mailto:samuel.pitoiset@gmail.com>> wrote:
> >>
> >>     I have something similar on my local tree (started on monday).
> >>
> >>     Though, I don't like the way we expose the number of VGPRS/SGPRS
> >>     because we can't really figure out the number of spilled ones.
> >>
> >>
> >> My assumption was that if we've spilled then we've used all available
> >> registers, so if numUsed{V,S}gprs is greater than the number available,
> then
> >> you'd know that the number spilled is the difference between the two.
> Can we
> >> have spilling when num_{v,s}gprs is less than the number available?
> >
> >
> > Assuming the number of waves per CU is 4, I would go with:
> >
> > num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves
> (aligned
> > down to 4).
>
> for compute there is
>
> num_available_vgprs (as LLVM sees as constraints) = num_physical_vgprs
> / ceil(compute_workgroup_size / 256)
>
> for other stages it always is 256. (Until we implement the wave limit ext)
>
> Reading from the spec I think it is unintuitive that the usedVgpr
> stats include spilled registers though. I'd
> expect to see just the physically used regs. Is this something that
> Feral has tried on the official driver on any platform? I'd say to not
> include the spilled regs (you can get it approximately with scratch
> memory / 256), unless the official driver does otherwise, in which
> case we should go for consistency.
>

I've not looked at amdgpu-pro, I'm unable to check it right now. Not sure
if that would even have the extension since it only appeared in the spec
very recently.

I'll go with what you suggest for now, I think you're probably right that
we shouldn't include the spilled registers.

Alex


>
> >
> > (or we can just set num_available_vgprs to conf->num_vgprs and return
> > num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).
> >
> > That way, if num_used_vgprs is greater than num_available_vgprs we know
> that
> > we are spilling some vgprs.
> >
> > For the number of available SGPRs, I think we can just hardcode the
> value to
> > 104 for now.
> >
> > Also with this, we can easily re-compute the maximum number of waves.
> >
> >>
> >> Alex
> >>
> >>
> >>
> >>     On 10/25/2017 01:18 PM, Alex Smith wrote:
> >>
> >>         This allows an app to query shader statistics and get a
> >>         disassembly of
> >>         a shader. RenderDoc git has support for it, so this allows you
> >>         to view
> >>         shader disassembly from a capture.
> >>
> >>         When this extension is enabled on a device (or when tracing), we
> >> now
> >>         disable pipeline caching, since we don't get the shader debug
> >>         info when
> >>         we retrieve cached shaders.
> >>
> >>         Signed-off-by: Alex Smith <asmith@feralinteractive.com
> >>         <mailto:asmith@feralinteractive.com>>
> >>
> >>         ---
> >>            src/amd/vulkan/radv_device.c         |   9 ++
> >>            src/amd/vulkan/radv_extensions.py    |   1 +
> >>            src/amd/vulkan/radv_pipeline.c       |   2 +-
> >>            src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
> >>            src/amd/vulkan/radv_private.h        |   3 +
> >>            src/amd/vulkan/radv_shader.c         | 163
> >>         ++++++++++++++++++++++++++++-------
> >>            6 files changed, 154 insertions(+), 35 deletions(-)
> >>
> >>         diff --git a/src/amd/vulkan/radv_device.c
> >>         b/src/amd/vulkan/radv_device.c
> >>         index c4e25222ea..5603551680 100644
> >>         --- a/src/amd/vulkan/radv_device.c
> >>         +++ b/src/amd/vulkan/radv_device.c
> >>         @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
> >>                  VkResult result;
> >>                  struct radv_device *device;
> >>            +     bool keep_shader_info = false;
> >>         +
> >>                  for (uint32_t i = 0; i <
> >>         pCreateInfo->enabledExtensionCount; i++) {
> >>                          const char *ext_name =
> >>         pCreateInfo->ppEnabledExtensionNames[i];
> >>                          if
> >>         (!radv_physical_device_extension_supported(physical_device,
> >>         ext_name))
> >>                                  return
> >>         vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
> >>         +
> >>         +               if (strcmp(ext_name,
> >>         VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
> >>         +                       keep_shader_info = true;
> >>                  }
> >>                  /* Check enabled features */
> >>         @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
> >>                          device->physical_device->rad_info.max_se >= 2;
> >>                  if (getenv("RADV_TRACE_FILE")) {
> >>         +               keep_shader_info = true;
> >>         +
> >>                          if (!radv_init_trace(device))
> >>                                  goto fail;
> >>                  }
> >>            +     device->keep_shader_info = keep_shader_info;
> >>         +
> >>                  result = radv_device_init_meta(device);
> >>                  if (result != VK_SUCCESS)
> >>                          goto fail;
> >>         diff --git a/src/amd/vulkan/radv_extensions.py
> >>         b/src/amd/vulkan/radv_extensions.py
> >>         index dfeb2880fc..eeb679d65a 100644
> >>         --- a/src/amd/vulkan/radv_extensions.py
> >>         +++ b/src/amd/vulkan/radv_extensions.py
> >>         @@ -81,6 +81,7 @@ EXTENSIONS = [
> >>                Extension('VK_EXT_global_priority',                   1,
> >>         'device->rad_info.has_ctx_priority'),
> >>                Extension('VK_AMD_draw_indirect_count',               1,
> >>         True),
> >>                Extension('VK_AMD_rasterization_order',               1,
> >>         'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
> >>         2'),
> >>         +    Extension('VK_AMD_shader_info',                       1,
> >> True),
> >>            ]
> >>              class VkVersion:
> >>         diff --git a/src/amd/vulkan/radv_pipeline.c
> >>         b/src/amd/vulkan/radv_pipeline.c
> >>         index d6b33a5327..2df03a83cf 100644
> >>         --- a/src/amd/vulkan/radv_pipeline.c
> >>         +++ b/src/amd/vulkan/radv_pipeline.c
> >>         @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
> >>         radv_pipeline *pipeline,
> >>                                  if (device->instance->debug_flags &
> >>         RADV_DEBUG_DUMP_SHADERS)
> >>                                          nir_print_shader(nir[i],
> stderr);
> >>            -                     if (!pipeline->device->trace_bo)
> >>         +                       if (!pipeline->device->keep_
> shader_info)
> >>                                          ralloc_free(nir[i]);
> >>                          }
> >>                  }
> >>         diff --git a/src/amd/vulkan/radv_pipeline_cache.c
> >>         b/src/amd/vulkan/radv_pipeline_cache.c
> >>         index 9ba9a3b61b..46198799a7 100644
> >>         --- a/src/amd/vulkan/radv_pipeline_cache.c
> >>         +++ b/src/amd/vulkan/radv_pipeline_cache.c
> >>         @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
> >>         radv_pipeline_cache *cache,
> >>                  cache->hash_table = malloc(byte_size);
> >>                  /* We don't consider allocation failure fatal, we just
> >>         start with a 0-sized
> >>         -        * cache. */
> >>         +        * cache. Disable caching when we want to keep shader
> >>         debug info, since
> >>         +        * we don't get the debug info on cached shaders. */
> >>                  if (cache->hash_table == NULL ||
> >>         -           (device->instance->debug_flags &
> RADV_DEBUG_NO_CACHE))
> >>         +           (device->instance->debug_flags &
> RADV_DEBUG_NO_CACHE)
> >> ||
> >>         +           device->keep_shader_info)
> >>                          cache->table_size = 0;
> >>                  else
> >>                          memset(cache->hash_table, 0, byte_size);
> >>         @@ -186,8 +188,11 @@
> >>         radv_create_shader_variants_from_pipeline_cache(struct
> >>         radv_device *device,
> >>                  entry = radv_pipeline_cache_search_unlocked(cache,
> sha1);
> >>                  if (!entry) {
> >>         +               /* Again, don't cache when we want debug info,
> >>         since this isn't
> >>         +                * present in the cache. */
> >>                          if (!device->physical_device->disk_cache ||
> >>         -                   (device->instance->debug_flags &
> >>         RADV_DEBUG_NO_CACHE)) {
> >>         +                   (device->instance->debug_flags &
> >>         RADV_DEBUG_NO_CACHE) ||
> >>         +                   device->keep_shader_info) {
> >>                                  pthread_mutex_unlock(&cache->mutex);
> >>                                  return false;
> >>                          }
> >>         diff --git a/src/amd/vulkan/radv_private.h
> >>         b/src/amd/vulkan/radv_private.h
> >>         index a4e52b2530..169df5f37b 100644
> >>         --- a/src/amd/vulkan/radv_private.h
> >>         +++ b/src/amd/vulkan/radv_private.h
> >>         @@ -552,6 +552,9 @@ struct radv_device {
> >>                  struct radeon_winsys_bo                      *trace_bo;
> >>                  uint32_t
> >> *trace_id_ptr;
> >>            +     /* Whether to keep shader debug info, for tracing or
> >>         VK_AMD_shader_info */
> >>         +       bool
> >> keep_shader_info;
> >>         +
> >>                  struct radv_physical_device
> >> *physical_device;
> >>                  /* Backup in-memory cache to be used if the app doesn't
> >>         provide one */
> >>         diff --git a/src/amd/vulkan/radv_shader.c
> >>         b/src/amd/vulkan/radv_shader.c
> >>         index 5903917068..7f2f0fd750 100644
> >>         --- a/src/amd/vulkan/radv_shader.c
> >>         +++ b/src/amd/vulkan/radv_shader.c
> >>         @@ -46,6 +46,8 @@
> >>            #include "util/debug.h"
> >>            #include "ac_exp_param.h"
> >>            +#include "util/string_buffer.h"
> >>         +
> >>            static const struct nir_shader_compiler_options nir_options
> = {
> >>                  .vertex_id_zero_based = true,
> >>                  .lower_scmp = true,
> >>         @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
> >>         *device,
> >>                  free(binary.relocs);
> >>                  variant->ref_count = 1;
> >>            -     if (device->trace_bo) {
> >>         +       if (device->keep_shader_info) {
> >>                          variant->disasm_string = binary.disasm_string;
> >>                          if (!gs_copy_shader && !module->nir) {
> >>                                  variant->nir = *shaders;
> >>         @@ -593,11 +595,20 @@ radv_get_shader_name(struct
> >>         radv_shader_variant *var, gl_shader_stage stage)
> >>                  };
> >>            }
> >>            -void
> >>         -radv_shader_dump_stats(struct radv_device *device,
> >>         -                      struct radv_shader_variant *variant,
> >>         -                      gl_shader_stage stage,
> >>         -                      FILE *file)
> >>         +static uint32_t
> >>         +get_total_sgprs(struct radv_device *device)
> >>         +{
> >>         +       if (device->physical_device->rad_info.chip_class >= VI)
> >>         +               return 800;
> >>         +       else
> >>         +               return 512;
> >>         +}
> >>         +
> >>         +static void
> >>         +generate_shader_stats(struct radv_device *device,
> >>         +                     struct radv_shader_variant *variant,
> >>         +                     gl_shader_stage stage,
> >>         +                     struct _mesa_string_buffer *buf)
> >>            {
> >>                  unsigned lds_increment =
> >>         device->physical_device->rad_info.chip_class >= CIK ? 512 :
> 256;
> >>                  struct ac_shader_config *conf;
> >>         @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
> >>         *device,
> >>                                               lds_increment);
> >>                  }
> >>            -     if (conf->num_sgprs) {
> >>         -               if (device->physical_device->rad_
> info.chip_class
> >>          >= VI)
> >>         -                       max_simd_waves = MIN2(max_simd_waves,
> >>         800 / conf->num_sgprs);
> >>         -               else
> >>         -                       max_simd_waves = MIN2(max_simd_waves,
> >>         512 / conf->num_sgprs);
> >>         -       }
> >>         +       if (conf->num_sgprs)
> >>         +               max_simd_waves = MIN2(max_simd_waves,
> >>         get_total_sgprs(device) / conf->num_sgprs);
> >>                  if (conf->num_vgprs)
> >>                          max_simd_waves = MIN2(max_simd_waves, 256 /
> >>         conf->num_vgprs);
> >>         @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
> >>         *device,
> >>                  if (lds_per_wave)
> >>                          max_simd_waves = MIN2(max_simd_waves, 16384 /
> >>         lds_per_wave);
> >>            +     if (stage == MESA_SHADER_FRAGMENT) {
> >>         +               _mesa_string_buffer_printf(buf, "*** SHADER
> >>         CONFIG ***\n"
> >>         +                                          "SPI_PS_INPUT_ADDR =
> >>         0x%04x\n"
> >>         +                                          "SPI_PS_INPUT_ENA  =
> >>         0x%04x\n",
> >>         +
> >> conf->spi_ps_input_addr, conf->spi_ps_input_ena);
> >>         +       }
> >>         +
> >>         +       _mesa_string_buffer_printf(buf, "*** SHADER STATS
> ***\n"
> >>         +                                  "SGPRS: %d\n"
> >>         +                                  "VGPRS: %d\n"
> >>         +                                  "Spilled SGPRs: %d\n"
> >>         +                                  "Spilled VGPRs: %d\n"
> >>         +                                  "Code Size: %d bytes\n"
> >>         +                                  "LDS: %d blocks\n"
> >>         +                                  "Scratch: %d bytes per
> wave\n"
> >>         +                                  "Max Waves: %d\n"
> >>         +                                  "********************\n\n\n",
> >>         +                                  conf->num_sgprs,
> >> conf->num_vgprs,
> >>         +                                  conf->spilled_sgprs,
> >>         conf->spilled_vgprs, variant->code_size,
> >>         +                                  conf->lds_size,
> >>         conf->scratch_bytes_per_wave,
> >>         +                                  max_simd_waves);
> >>         +}
> >>         +
> >>         +void
> >>         +radv_shader_dump_stats(struct radv_device *device,
> >>         +                      struct radv_shader_variant *variant,
> >>         +                      gl_shader_stage stage,
> >>         +                      FILE *file)
> >>         +{
> >>         +       struct _mesa_string_buffer *buf =
> >>         _mesa_string_buffer_create(NULL, 256);
> >>         +
> >>         +       generate_shader_stats(device, variant, stage, buf);
> >>         +
> >>                  fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
> >>         stage));
> >>         +       fprintf(file, buf->buf);
> >>            -     if (stage == MESA_SHADER_FRAGMENT) {
> >>         -               fprintf(file, "*** SHADER CONFIG ***\n"
> >>         -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
> >>         -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
> >>         -                       conf->spi_ps_input_addr,
> >>         conf->spi_ps_input_ena);
> >>         +       _mesa_string_buffer_destroy(buf);
> >>         +}
> >>         +
> >>         +VkResult
> >>         +radv_GetShaderInfoAMD(VkDevice _device,
> >>         +                     VkPipeline _pipeline,
> >>         +                     VkShaderStageFlagBits shaderStage,
> >>         +                     VkShaderInfoTypeAMD infoType,
> >>         +                     size_t* pInfoSize,
> >>         +                     void* pInfo)
> >>         +{
> >>         +       RADV_FROM_HANDLE(radv_device, device, _device);
> >>         +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
> >>         +       gl_shader_stage stage =
> >>         vk_to_mesa_shader_stage(shaderStage);
> >>         +       struct radv_shader_variant *variant =
> >>         pipeline->shaders[stage];
> >>         +       struct _mesa_string_buffer *buf;
> >>         +       VkResult result = VK_SUCCESS;
> >>         +
> >>         +       /* Spec doesn't indicate what to do if the stage is
> >>         invalid, so just
> >>         +        * return no info for this. */
> >>         +       if (!variant)
> >>         +               return VK_ERROR_FEATURE_NOT_PRESENT;
> >>         +
> >>         +       switch (infoType) {
> >>         +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
> >>         +               if (!pInfo) {
> >>         +                       *pInfoSize =
> >>         sizeof(VkShaderStatisticsInfoAMD);
> >>         +               } else {
> >>         +                       struct ac_shader_config *conf =
> >>         &variant->config;
> >>         +
> >>         +                       VkShaderStatisticsInfoAMD statistics =
> {};
> >>         +                       statistics.shaderStageMask =
> shaderStage;
> >>         +                       statistics.resourceUsage.numUsedVgprs =
> >>         conf->num_vgprs + conf->spilled_vgprs;
> >>         +                       statistics.resourceUsage.numUsedSgprs =
> >>         conf->num_sgprs + conf->spilled_sgprs;
> >>         +
> >> statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
> >>         +
> >> statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
> >>         +
> >> statistics.resourceUsage.scratchMemUsageInBytes =
> >>         conf->scratch_bytes_per_wave;
> >>         +                       statistics.numPhysicalVgprs =
> >>         statistics.numAvailableVgprs = 256;
> >>         +                       statistics.numPhysicalSgprs =
> >>         statistics.numAvailableSgprs = get_total_sgprs(device);
> >>         +                       statistics.computeWorkGroupSize[0] =
> >>         variant->nir->info.cs.local_size[0];
> >>         +                       statistics.computeWorkGroupSize[1] =
> >>         variant->nir->info.cs.local_size[1];
> >>         +                       statistics.computeWorkGroupSize[2] =
> >>         variant->nir->info.cs.local_size[2];
> >>         +
> >>         +                       size_t size = *pInfoSize;
> >>         +                       *pInfoSize = sizeof(statistics);
> >>         +
> >>         +                       memcpy(pInfo, &statistics, MIN2(size,
> >>         *pInfoSize));
> >>         +
> >>         +                       if (size < *pInfoSize)
> >>         +                               result = VK_INCOMPLETE;
> >>         +               }
> >>         +
> >>         +               break;
> >>         +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
> >>         +               buf = _mesa_string_buffer_create(NULL, 1024);
> >>         +
> >>         +               _mesa_string_buffer_printf(buf, "%s:\n",
> >>         radv_get_shader_name(variant, stage));
> >>         +               _mesa_string_buffer_printf(buf, "%s\n\n",
> >>         variant->disasm_string);
> >>         +               generate_shader_stats(device, variant, stage,
> >> buf);
> >>         +
> >>         +               if (!pInfo) {
> >>         +                       *pInfoSize = buf->length;
> >>         +               } else {
> >>         +                       size_t size = *pInfoSize;
> >>         +                       *pInfoSize = buf->length;
> >>         +
> >>         +                       memcpy(pInfo, buf->buf, MIN2(size,
> >>         buf->length));
> >>         +
> >>         +                       if (size < buf->length)
> >>         +                               result = VK_INCOMPLETE;
> >>         +               }
> >>         +
> >>         +               _mesa_string_buffer_destroy(buf);
> >>         +               break;
> >>         +       default:
> >>         +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
> >>         for now. */
> >>         +               result = VK_ERROR_FEATURE_NOT_PRESENT;
> >>         +               break;
> >>                  }
> >>            -     fprintf(file, "*** SHADER STATS ***\n"
> >>         -               "SGPRS: %d\n"
> >>         -               "VGPRS: %d\n"
> >>         -               "Spilled SGPRs: %d\n"
> >>         -               "Spilled VGPRs: %d\n"
> >>         -               "Code Size: %d bytes\n"
> >>         -               "LDS: %d blocks\n"
> >>         -               "Scratch: %d bytes per wave\n"
> >>         -               "Max Waves: %d\n"
> >>         -               "********************\n\n\n",
> >>         -               conf->num_sgprs, conf->num_vgprs,
> >>         -               conf->spilled_sgprs, conf->spilled_vgprs,
> >>         variant->code_size,
> >>         -               conf->lds_size, conf->scratch_bytes_per_wave,
> >>         -               max_simd_waves);
> >>         +       return result;
> >>            }
> >>
> >>
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On 10/25/2017 10:58 PM, Bas Nieuwenhuizen wrote:
> On Wed, Oct 25, 2017 at 4:03 PM, Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>>
>>
>> On 10/25/2017 02:20 PM, Alex Smith wrote:
>>>
>>> On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset@gmail.com
>>> <mailto:samuel.pitoiset@gmail.com>> wrote:
>>>
>>>      I have something similar on my local tree (started on monday).
>>>
>>>      Though, I don't like the way we expose the number of VGPRS/SGPRS
>>>      because we can't really figure out the number of spilled ones.
>>>
>>>
>>> My assumption was that if we've spilled then we've used all available
>>> registers, so if numUsed{V,S}gprs is greater than the number available, then
>>> you'd know that the number spilled is the difference between the two. Can we
>>> have spilling when num_{v,s}gprs is less than the number available?
>>
>>
>> Assuming the number of waves per CU is 4, I would go with:
>>
>> num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves (aligned
>> down to 4).
> 
> for compute there is
> 
> num_available_vgprs (as LLVM sees as constraints) = num_physical_vgprs
> / ceil(compute_workgroup_size / 256)
> 
> for other stages it always is 256. (Until we implement the wave limit ext)
> 
> Reading from the spec I think it is unintuitive that the usedVgpr
> stats include spilled registers though. I'd
> expect to see just the physically used regs. Is this something that
> Feral has tried on the official driver on any platform? I'd say to not
> include the spilled regs (you can get it approximately with scratch
> memory / 256), unless the official driver does otherwise, in which
> case we should go for consistency.

Yeah, for the number of spilled VGPRS we can get it from the scratch 
memory size, but not for the spilled SGPRS. I think it would have been 
better to add fields for the spilled VGPRS/SGPRS.

> 
>>
>> (or we can just set num_available_vgprs to conf->num_vgprs and return
>> num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).
>>
>> That way, if num_used_vgprs is greater than num_available_vgprs we know that
>> we are spilling some vgprs.
>>
>> For the number of available SGPRs, I think we can just hardcode the value to
>> 104 for now.
>>
>> Also with this, we can easily re-compute the maximum number of waves.
>>
>>>
>>> Alex
>>>
>>>
>>>
>>>      On 10/25/2017 01:18 PM, Alex Smith wrote:
>>>
>>>          This allows an app to query shader statistics and get a
>>>          disassembly of
>>>          a shader. RenderDoc git has support for it, so this allows you
>>>          to view
>>>          shader disassembly from a capture.
>>>
>>>          When this extension is enabled on a device (or when tracing), we
>>> now
>>>          disable pipeline caching, since we don't get the shader debug
>>>          info when
>>>          we retrieve cached shaders.
>>>
>>>          Signed-off-by: Alex Smith <asmith@feralinteractive.com
>>>          <mailto:asmith@feralinteractive.com>>
>>>
>>>          ---
>>>             src/amd/vulkan/radv_device.c         |   9 ++
>>>             src/amd/vulkan/radv_extensions.py    |   1 +
>>>             src/amd/vulkan/radv_pipeline.c       |   2 +-
>>>             src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>>>             src/amd/vulkan/radv_private.h        |   3 +
>>>             src/amd/vulkan/radv_shader.c         | 163
>>>          ++++++++++++++++++++++++++++-------
>>>             6 files changed, 154 insertions(+), 35 deletions(-)
>>>
>>>          diff --git a/src/amd/vulkan/radv_device.c
>>>          b/src/amd/vulkan/radv_device.c
>>>          index c4e25222ea..5603551680 100644
>>>          --- a/src/amd/vulkan/radv_device.c
>>>          +++ b/src/amd/vulkan/radv_device.c
>>>          @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>>>                   VkResult result;
>>>                   struct radv_device *device;
>>>             +     bool keep_shader_info = false;
>>>          +
>>>                   for (uint32_t i = 0; i <
>>>          pCreateInfo->enabledExtensionCount; i++) {
>>>                           const char *ext_name =
>>>          pCreateInfo->ppEnabledExtensionNames[i];
>>>                           if
>>>          (!radv_physical_device_extension_supported(physical_device,
>>>          ext_name))
>>>                                   return
>>>          vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>>>          +
>>>          +               if (strcmp(ext_name,
>>>          VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
>>>          +                       keep_shader_info = true;
>>>                   }
>>>                   /* Check enabled features */
>>>          @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>>>                           device->physical_device->rad_info.max_se >= 2;
>>>                   if (getenv("RADV_TRACE_FILE")) {
>>>          +               keep_shader_info = true;
>>>          +
>>>                           if (!radv_init_trace(device))
>>>                                   goto fail;
>>>                   }
>>>             +     device->keep_shader_info = keep_shader_info;
>>>          +
>>>                   result = radv_device_init_meta(device);
>>>                   if (result != VK_SUCCESS)
>>>                           goto fail;
>>>          diff --git a/src/amd/vulkan/radv_extensions.py
>>>          b/src/amd/vulkan/radv_extensions.py
>>>          index dfeb2880fc..eeb679d65a 100644
>>>          --- a/src/amd/vulkan/radv_extensions.py
>>>          +++ b/src/amd/vulkan/radv_extensions.py
>>>          @@ -81,6 +81,7 @@ EXTENSIONS = [
>>>                 Extension('VK_EXT_global_priority',                   1,
>>>          'device->rad_info.has_ctx_priority'),
>>>                 Extension('VK_AMD_draw_indirect_count',               1,
>>>          True),
>>>                 Extension('VK_AMD_rasterization_order',               1,
>>>          'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
>>>          2'),
>>>          +    Extension('VK_AMD_shader_info',                       1,
>>> True),
>>>             ]
>>>               class VkVersion:
>>>          diff --git a/src/amd/vulkan/radv_pipeline.c
>>>          b/src/amd/vulkan/radv_pipeline.c
>>>          index d6b33a5327..2df03a83cf 100644
>>>          --- a/src/amd/vulkan/radv_pipeline.c
>>>          +++ b/src/amd/vulkan/radv_pipeline.c
>>>          @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
>>>          radv_pipeline *pipeline,
>>>                                   if (device->instance->debug_flags &
>>>          RADV_DEBUG_DUMP_SHADERS)
>>>                                           nir_print_shader(nir[i], stderr);
>>>             -                     if (!pipeline->device->trace_bo)
>>>          +                       if (!pipeline->device->keep_shader_info)
>>>                                           ralloc_free(nir[i]);
>>>                           }
>>>                   }
>>>          diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>>>          b/src/amd/vulkan/radv_pipeline_cache.c
>>>          index 9ba9a3b61b..46198799a7 100644
>>>          --- a/src/amd/vulkan/radv_pipeline_cache.c
>>>          +++ b/src/amd/vulkan/radv_pipeline_cache.c
>>>          @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
>>>          radv_pipeline_cache *cache,
>>>                   cache->hash_table = malloc(byte_size);
>>>                   /* We don't consider allocation failure fatal, we just
>>>          start with a 0-sized
>>>          -        * cache. */
>>>          +        * cache. Disable caching when we want to keep shader
>>>          debug info, since
>>>          +        * we don't get the debug info on cached shaders. */
>>>                   if (cache->hash_table == NULL ||
>>>          -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>>>          +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)
>>> ||
>>>          +           device->keep_shader_info)
>>>                           cache->table_size = 0;
>>>                   else
>>>                           memset(cache->hash_table, 0, byte_size);
>>>          @@ -186,8 +188,11 @@
>>>          radv_create_shader_variants_from_pipeline_cache(struct
>>>          radv_device *device,
>>>                   entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>>>                   if (!entry) {
>>>          +               /* Again, don't cache when we want debug info,
>>>          since this isn't
>>>          +                * present in the cache. */
>>>                           if (!device->physical_device->disk_cache ||
>>>          -                   (device->instance->debug_flags &
>>>          RADV_DEBUG_NO_CACHE)) {
>>>          +                   (device->instance->debug_flags &
>>>          RADV_DEBUG_NO_CACHE) ||
>>>          +                   device->keep_shader_info) {
>>>                                   pthread_mutex_unlock(&cache->mutex);
>>>                                   return false;
>>>                           }
>>>          diff --git a/src/amd/vulkan/radv_private.h
>>>          b/src/amd/vulkan/radv_private.h
>>>          index a4e52b2530..169df5f37b 100644
>>>          --- a/src/amd/vulkan/radv_private.h
>>>          +++ b/src/amd/vulkan/radv_private.h
>>>          @@ -552,6 +552,9 @@ struct radv_device {
>>>                   struct radeon_winsys_bo                      *trace_bo;
>>>                   uint32_t
>>> *trace_id_ptr;
>>>             +     /* Whether to keep shader debug info, for tracing or
>>>          VK_AMD_shader_info */
>>>          +       bool
>>> keep_shader_info;
>>>          +
>>>                   struct radv_physical_device
>>> *physical_device;
>>>                   /* Backup in-memory cache to be used if the app doesn't
>>>          provide one */
>>>          diff --git a/src/amd/vulkan/radv_shader.c
>>>          b/src/amd/vulkan/radv_shader.c
>>>          index 5903917068..7f2f0fd750 100644
>>>          --- a/src/amd/vulkan/radv_shader.c
>>>          +++ b/src/amd/vulkan/radv_shader.c
>>>          @@ -46,6 +46,8 @@
>>>             #include "util/debug.h"
>>>             #include "ac_exp_param.h"
>>>             +#include "util/string_buffer.h"
>>>          +
>>>             static const struct nir_shader_compiler_options nir_options = {
>>>                   .vertex_id_zero_based = true,
>>>                   .lower_scmp = true,
>>>          @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
>>>          *device,
>>>                   free(binary.relocs);
>>>                   variant->ref_count = 1;
>>>             -     if (device->trace_bo) {
>>>          +       if (device->keep_shader_info) {
>>>                           variant->disasm_string = binary.disasm_string;
>>>                           if (!gs_copy_shader && !module->nir) {
>>>                                   variant->nir = *shaders;
>>>          @@ -593,11 +595,20 @@ radv_get_shader_name(struct
>>>          radv_shader_variant *var, gl_shader_stage stage)
>>>                   };
>>>             }
>>>             -void
>>>          -radv_shader_dump_stats(struct radv_device *device,
>>>          -                      struct radv_shader_variant *variant,
>>>          -                      gl_shader_stage stage,
>>>          -                      FILE *file)
>>>          +static uint32_t
>>>          +get_total_sgprs(struct radv_device *device)
>>>          +{
>>>          +       if (device->physical_device->rad_info.chip_class >= VI)
>>>          +               return 800;
>>>          +       else
>>>          +               return 512;
>>>          +}
>>>          +
>>>          +static void
>>>          +generate_shader_stats(struct radv_device *device,
>>>          +                     struct radv_shader_variant *variant,
>>>          +                     gl_shader_stage stage,
>>>          +                     struct _mesa_string_buffer *buf)
>>>             {
>>>                   unsigned lds_increment =
>>>          device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>>>                   struct ac_shader_config *conf;
>>>          @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
>>>          *device,
>>>                                                lds_increment);
>>>                   }
>>>             -     if (conf->num_sgprs) {
>>>          -               if (device->physical_device->rad_info.chip_class
>>>           >= VI)
>>>          -                       max_simd_waves = MIN2(max_simd_waves,
>>>          800 / conf->num_sgprs);
>>>          -               else
>>>          -                       max_simd_waves = MIN2(max_simd_waves,
>>>          512 / conf->num_sgprs);
>>>          -       }
>>>          +       if (conf->num_sgprs)
>>>          +               max_simd_waves = MIN2(max_simd_waves,
>>>          get_total_sgprs(device) / conf->num_sgprs);
>>>                   if (conf->num_vgprs)
>>>                           max_simd_waves = MIN2(max_simd_waves, 256 /
>>>          conf->num_vgprs);
>>>          @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
>>>          *device,
>>>                   if (lds_per_wave)
>>>                           max_simd_waves = MIN2(max_simd_waves, 16384 /
>>>          lds_per_wave);
>>>             +     if (stage == MESA_SHADER_FRAGMENT) {
>>>          +               _mesa_string_buffer_printf(buf, "*** SHADER
>>>          CONFIG ***\n"
>>>          +                                          "SPI_PS_INPUT_ADDR =
>>>          0x%04x\n"
>>>          +                                          "SPI_PS_INPUT_ENA  =
>>>          0x%04x\n",
>>>          +
>>> conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>>>          +       }
>>>          +
>>>          +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>>>          +                                  "SGPRS: %d\n"
>>>          +                                  "VGPRS: %d\n"
>>>          +                                  "Spilled SGPRs: %d\n"
>>>          +                                  "Spilled VGPRs: %d\n"
>>>          +                                  "Code Size: %d bytes\n"
>>>          +                                  "LDS: %d blocks\n"
>>>          +                                  "Scratch: %d bytes per wave\n"
>>>          +                                  "Max Waves: %d\n"
>>>          +                                  "********************\n\n\n",
>>>          +                                  conf->num_sgprs,
>>> conf->num_vgprs,
>>>          +                                  conf->spilled_sgprs,
>>>          conf->spilled_vgprs, variant->code_size,
>>>          +                                  conf->lds_size,
>>>          conf->scratch_bytes_per_wave,
>>>          +                                  max_simd_waves);
>>>          +}
>>>          +
>>>          +void
>>>          +radv_shader_dump_stats(struct radv_device *device,
>>>          +                      struct radv_shader_variant *variant,
>>>          +                      gl_shader_stage stage,
>>>          +                      FILE *file)
>>>          +{
>>>          +       struct _mesa_string_buffer *buf =
>>>          _mesa_string_buffer_create(NULL, 256);
>>>          +
>>>          +       generate_shader_stats(device, variant, stage, buf);
>>>          +
>>>                   fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
>>>          stage));
>>>          +       fprintf(file, buf->buf);
>>>             -     if (stage == MESA_SHADER_FRAGMENT) {
>>>          -               fprintf(file, "*** SHADER CONFIG ***\n"
>>>          -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>>>          -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>>>          -                       conf->spi_ps_input_addr,
>>>          conf->spi_ps_input_ena);
>>>          +       _mesa_string_buffer_destroy(buf);
>>>          +}
>>>          +
>>>          +VkResult
>>>          +radv_GetShaderInfoAMD(VkDevice _device,
>>>          +                     VkPipeline _pipeline,
>>>          +                     VkShaderStageFlagBits shaderStage,
>>>          +                     VkShaderInfoTypeAMD infoType,
>>>          +                     size_t* pInfoSize,
>>>          +                     void* pInfo)
>>>          +{
>>>          +       RADV_FROM_HANDLE(radv_device, device, _device);
>>>          +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>>>          +       gl_shader_stage stage =
>>>          vk_to_mesa_shader_stage(shaderStage);
>>>          +       struct radv_shader_variant *variant =
>>>          pipeline->shaders[stage];
>>>          +       struct _mesa_string_buffer *buf;
>>>          +       VkResult result = VK_SUCCESS;
>>>          +
>>>          +       /* Spec doesn't indicate what to do if the stage is
>>>          invalid, so just
>>>          +        * return no info for this. */
>>>          +       if (!variant)
>>>          +               return VK_ERROR_FEATURE_NOT_PRESENT;
>>>          +
>>>          +       switch (infoType) {
>>>          +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>>>          +               if (!pInfo) {
>>>          +                       *pInfoSize =
>>>          sizeof(VkShaderStatisticsInfoAMD);
>>>          +               } else {
>>>          +                       struct ac_shader_config *conf =
>>>          &variant->config;
>>>          +
>>>          +                       VkShaderStatisticsInfoAMD statistics = {};
>>>          +                       statistics.shaderStageMask = shaderStage;
>>>          +                       statistics.resourceUsage.numUsedVgprs =
>>>          conf->num_vgprs + conf->spilled_vgprs;
>>>          +                       statistics.resourceUsage.numUsedSgprs =
>>>          conf->num_sgprs + conf->spilled_sgprs;
>>>          +
>>> statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
>>>          +
>>> statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
>>>          +
>>> statistics.resourceUsage.scratchMemUsageInBytes =
>>>          conf->scratch_bytes_per_wave;
>>>          +                       statistics.numPhysicalVgprs =
>>>          statistics.numAvailableVgprs = 256;
>>>          +                       statistics.numPhysicalSgprs =
>>>          statistics.numAvailableSgprs = get_total_sgprs(device);
>>>          +                       statistics.computeWorkGroupSize[0] =
>>>          variant->nir->info.cs.local_size[0];
>>>          +                       statistics.computeWorkGroupSize[1] =
>>>          variant->nir->info.cs.local_size[1];
>>>          +                       statistics.computeWorkGroupSize[2] =
>>>          variant->nir->info.cs.local_size[2];
>>>          +
>>>          +                       size_t size = *pInfoSize;
>>>          +                       *pInfoSize = sizeof(statistics);
>>>          +
>>>          +                       memcpy(pInfo, &statistics, MIN2(size,
>>>          *pInfoSize));
>>>          +
>>>          +                       if (size < *pInfoSize)
>>>          +                               result = VK_INCOMPLETE;
>>>          +               }
>>>          +
>>>          +               break;
>>>          +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>>>          +               buf = _mesa_string_buffer_create(NULL, 1024);
>>>          +
>>>          +               _mesa_string_buffer_printf(buf, "%s:\n",
>>>          radv_get_shader_name(variant, stage));
>>>          +               _mesa_string_buffer_printf(buf, "%s\n\n",
>>>          variant->disasm_string);
>>>          +               generate_shader_stats(device, variant, stage,
>>> buf);
>>>          +
>>>          +               if (!pInfo) {
>>>          +                       *pInfoSize = buf->length;
>>>          +               } else {
>>>          +                       size_t size = *pInfoSize;
>>>          +                       *pInfoSize = buf->length;
>>>          +
>>>          +                       memcpy(pInfo, buf->buf, MIN2(size,
>>>          buf->length));
>>>          +
>>>          +                       if (size < buf->length)
>>>          +                               result = VK_INCOMPLETE;
>>>          +               }
>>>          +
>>>          +               _mesa_string_buffer_destroy(buf);
>>>          +               break;
>>>          +       default:
>>>          +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
>>>          for now. */
>>>          +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>>>          +               break;
>>>                   }
>>>             -     fprintf(file, "*** SHADER STATS ***\n"
>>>          -               "SGPRS: %d\n"
>>>          -               "VGPRS: %d\n"
>>>          -               "Spilled SGPRs: %d\n"
>>>          -               "Spilled VGPRs: %d\n"
>>>          -               "Code Size: %d bytes\n"
>>>          -               "LDS: %d blocks\n"
>>>          -               "Scratch: %d bytes per wave\n"
>>>          -               "Max Waves: %d\n"
>>>          -               "********************\n\n\n",
>>>          -               conf->num_sgprs, conf->num_vgprs,
>>>          -               conf->spilled_sgprs, conf->spilled_vgprs,
>>>          variant->code_size,
>>>          -               conf->lds_size, conf->scratch_bytes_per_wave,
>>>          -               max_simd_waves);
>>>          +       return result;
>>>             }
>>>
>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev