radv: only load 2-dwords for vertex buffers when robustness is disabled

Submitted by Samuel Pitoiset on March 1, 2019, 3:54 p.m.

Details

Message ID 20190301155412.25288-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: only load 2-dwords for vertex buffers when robustness is disabled" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset March 1, 2019, 3:54 p.m.
This patch requires the typed vertex fetches series.

Totals from affected shaders:
SGPRS: 445574 -> 452638 (1.59 %)
VGPRS: 373392 -> 370436 (-0.79 %)
Spilled SGPRs: 77 -> 14 (-81.82 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 14162288 -> 14413036 (1.77 %) bytes
Max Waves: 119999 -> 120509 (0.43 %)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_device.c      |  2 ++
 src/amd/vulkan/radv_nir_to_llvm.c | 21 ++++++++++++++++++++-
 src/amd/vulkan/radv_private.h     |  1 +
 src/amd/vulkan/radv_shader.c      |  1 +
 src/amd/vulkan/radv_shader.h      |  1 +
 5 files changed, 25 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index fc04de21025..cbbd6b2f636 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1792,6 +1792,8 @@  VkResult radv_CreateDevice(
 	device->has_distributed_tess =
 		device->physical_device->rad_info.chip_class >= VI &&
 		device->physical_device->rad_info.max_se >= 2;
+	device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
+		pCreateInfo->pEnabledFeatures->robustBufferAccess;
 
 	if (getenv("RADV_TRACE_FILE")) {
 		const char *filename = getenv("RADV_TRACE_FILE");
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 79f8bffa0bd..adcfb3b776d 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -807,8 +807,11 @@  declare_vs_specific_input_sgprs(struct radv_shader_context *ctx,
 	    (stage == MESA_SHADER_VERTEX ||
 	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
 		if (ctx->shader_info->info.vs.has_vertex_buffers) {
+			LLVMTypeRef type =
+				ctx->options->robust_buffer_access ? ctx->ac.v4i32 : ctx->ac.v2i32;
+
 			add_arg(args, ARG_SGPR,
-				ac_array_in_const32_addr_space(ctx->ac.v4i32),
+				ac_array_in_const32_addr_space(type),
 				&ctx->vertex_buffers);
 		}
 		add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
@@ -2181,6 +2184,22 @@  handle_vs_input_decl(struct radv_shader_context *ctx,
 		t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
 		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
+		if (!ctx->options->robust_buffer_access) {
+			LLVMValueRef desc[4];
+			desc[0] = ac_llvm_extract_elem(&ctx->ac, t_list, 0);
+			desc[1] = ac_llvm_extract_elem(&ctx->ac, t_list, 1);
+			desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
+			desc[3] = LLVMConstInt(ctx->ac.i32,
+					       S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+					       S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+					       S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+					       S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+					       S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+					       S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0);
+
+			t_list = ac_build_gather_values(&ctx->ac, desc, 4);
+		}
+
 		input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index,
 					      LLVMConstInt(ctx->ac.i32, attrib_offset, false),
 					      ctx->ac.i32_0, ctx->ac.i32_0,
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 39fa6110fde..f81ba4c602c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -676,6 +676,7 @@  struct radv_device {
 
 	bool always_use_syncobj;
 	bool has_distributed_tess;
+	bool robust_buffer_access;
 	bool pbb_allowed;
 	bool dfsm_allowed;
 	uint32_t tess_offchip_block_dw_size;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 1dcb0606246..71010670903 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -605,6 +605,7 @@  shader_variant_create(struct radv_device *device,
 	options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
 	options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
 	options->address32_hi = device->physical_device->rad_info.address32_hi;
+	options->robust_buffer_access = device->robust_buffer_access;
 
 	if (options->supports_spill)
 		tm_options |= AC_TM_SUPPORTS_SPILL;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index fe2f2868630..9e214457ad8 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -127,6 +127,7 @@  struct radv_nir_compiler_options {
 	bool dump_preoptir;
 	bool record_llvm_ir;
 	bool check_ir;
+	bool robust_buffer_access;
 	enum radeon_family family;
 	enum chip_class chip_class;
 	uint32_t tess_offchip_block_dw_size;