radv: use LOAD_CONTEXT_REG when loading fast clear values

Submitted by Samuel Pitoiset on Nov. 7, 2018, 2:29 p.m.

Details

Message ID 20181107142913.5827-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: use LOAD_CONTEXT_REG when loading fast clear values" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset Nov. 7, 2018, 2:29 p.m.
This avoids syncing the Micro Engine. This is only supported
for VI+ currently. There is probably a way for using
LOAD_CONTEXT_REG on previous chips but that could be done later.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/common/sid.h             |  1 +
 src/amd/vulkan/radv_cmd_buffer.c | 60 +++++++++++++++++++++-----------
 src/amd/vulkan/radv_device.c     |  4 +++
 src/amd/vulkan/radv_private.h    |  1 +
 4 files changed, 46 insertions(+), 20 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 5c53133147..35782046dd 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -217,6 +217,7 @@ 
 #define PKT3_INCREMENT_CE_COUNTER              0x84
 #define PKT3_INCREMENT_DE_COUNTER              0x85
 #define PKT3_WAIT_ON_CE_COUNTER                0x86
+#define PKT3_LOAD_CONTEXT_REG                  0x9F /* new for VI */
 
 #define PKT_TYPE_S(x)                   (((unsigned)(x) & 0x3) << 30)
 #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index de67a8a363..4e10470357 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -37,6 +37,8 @@ 
 
 #include "ac_debug.h"
 
+#include "addrlib/gfx9/chip/gfx9_enum.h"
+
 enum {
 	RADV_PREFETCH_VBO_DESCRIPTORS	= (1 << 0),
 	RADV_PREFETCH_VS		= (1 << 1),
@@ -1313,17 +1315,27 @@  radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		++reg_count;
 
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-			COPY_DATA_DST_SEL(COPY_DATA_REG) |
-			(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
-	radeon_emit(cs, 0);
+	uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
 
-	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-	radeon_emit(cs, 0);
+	if (cmd_buffer->device->has_load_context_reg) {
+		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+		radeon_emit(cs, reg_count);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, reg >> 2);
+		radeon_emit(cs, 0);
+
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+		radeon_emit(cs, 0);
+	}
 }
 
 /*
@@ -1443,17 +1455,25 @@  radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 
 	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-			COPY_DATA_DST_SEL(COPY_DATA_REG) |
-			COPY_DATA_COUNT_SEL);
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, reg >> 2);
-	radeon_emit(cs, 0);
+	if (cmd_buffer->device->has_load_context_reg) {
+		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+		radeon_emit(cs, 2);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				COPY_DATA_COUNT_SEL);
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, reg >> 2);
+		radeon_emit(cs, 0);
 
-	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
-	radeon_emit(cs, 0);
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+		radeon_emit(cs, 0);
+	}
 }
 
 static void
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index d68111c25b..33a1b5bc1b 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1680,6 +1680,10 @@  VkResult radv_CreateDevice(
 		device->physical_device->rad_info.chip_class >= VI &&
 		device->physical_device->rad_info.max_se >= 2;
 
+	/* TODO: Figure out how to use LOAD_CONTEXT_REG with previous gens. */
+	device->has_load_context_reg =
+		device->physical_device->rad_info.chip_class >= VI;
+
 	if (getenv("RADV_TRACE_FILE")) {
 		const char *filename = getenv("RADV_TRACE_FILE");
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 1628be1002..28a06f2a48 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -657,6 +657,7 @@  struct radv_device {
 
 	bool always_use_syncobj;
 	bool has_distributed_tess;
+	bool has_load_context_reg;
 	bool pbb_allowed;
 	bool dfsm_allowed;
 	uint32_t tess_offchip_block_dw_size;

Comments

On 11/7/18 3:29 PM, Samuel Pitoiset wrote:
> This avoids syncing the Micro Engine. This is only supported
> for VI+ currently. There is probably a way for using
> LOAD_CONTEXT_REG on previous chips but that could be done later.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>   src/amd/common/sid.h             |  1 +
>   src/amd/vulkan/radv_cmd_buffer.c | 60 +++++++++++++++++++++-----------
>   src/amd/vulkan/radv_device.c     |  4 +++
>   src/amd/vulkan/radv_private.h    |  1 +
>   4 files changed, 46 insertions(+), 20 deletions(-)
> 
> diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
> index 5c53133147..35782046dd 100644
> --- a/src/amd/common/sid.h
> +++ b/src/amd/common/sid.h
> @@ -217,6 +217,7 @@
>   #define PKT3_INCREMENT_CE_COUNTER              0x84
>   #define PKT3_INCREMENT_DE_COUNTER              0x85
>   #define PKT3_WAIT_ON_CE_COUNTER                0x86
> +#define PKT3_LOAD_CONTEXT_REG                  0x9F /* new for VI */
>   
>   #define PKT_TYPE_S(x)                   (((unsigned)(x) & 0x3) << 30)
>   #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index de67a8a363..4e10470357 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -37,6 +37,8 @@
>   
>   #include "ac_debug.h"
>   
> +#include "addrlib/gfx9/chip/gfx9_enum.h"
> +
>   enum {
>   	RADV_PREFETCH_VBO_DESCRIPTORS	= (1 << 0),
>   	RADV_PREFETCH_VS		= (1 << 1),
> @@ -1313,17 +1315,27 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>   	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
>   		++reg_count;
>   
> -	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> -	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
> -			COPY_DATA_DST_SEL(COPY_DATA_REG) |
> -			(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
> -	radeon_emit(cs, va);
> -	radeon_emit(cs, va >> 32);
> -	radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
> -	radeon_emit(cs, 0);
> +	uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
>   
> -	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> -	radeon_emit(cs, 0);
> +	if (cmd_buffer->device->has_load_context_reg) {
> +		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
> +		radeon_emit(cs, va);
> +		radeon_emit(cs, va >> 32);
> +		radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
> +		radeon_emit(cs, reg_count);
> +	} else {
> +		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> +		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
> +				COPY_DATA_DST_SEL(COPY_DATA_REG) |
> +				(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
> +		radeon_emit(cs, va);
> +		radeon_emit(cs, va >> 32);
> +		radeon_emit(cs, reg >> 2);
> +		radeon_emit(cs, 0);
> +
> +		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> +		radeon_emit(cs, 0);
> +	}

Just figured that is useless because HTILE is only available for VI+.

>   }
>   
>   /*
> @@ -1443,17 +1455,25 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>   
>   	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
>   
> -	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
> -	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
> -			COPY_DATA_DST_SEL(COPY_DATA_REG) |
> -			COPY_DATA_COUNT_SEL);
> -	radeon_emit(cs, va);
> -	radeon_emit(cs, va >> 32);
> -	radeon_emit(cs, reg >> 2);
> -	radeon_emit(cs, 0);
> +	if (cmd_buffer->device->has_load_context_reg) {
> +		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
> +		radeon_emit(cs, va);
> +		radeon_emit(cs, va >> 32);
> +		radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
> +		radeon_emit(cs, 2);
> +	} else {
> +		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
> +		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
> +				COPY_DATA_DST_SEL(COPY_DATA_REG) |
> +				COPY_DATA_COUNT_SEL);
> +		radeon_emit(cs, va);
> +		radeon_emit(cs, va >> 32);
> +		radeon_emit(cs, reg >> 2);
> +		radeon_emit(cs, 0);
>   
> -	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
> -	radeon_emit(cs, 0);
> +		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
> +		radeon_emit(cs, 0);
> +	}
>   }
>   
>   static void
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index d68111c25b..33a1b5bc1b 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1680,6 +1680,10 @@ VkResult radv_CreateDevice(
>   		device->physical_device->rad_info.chip_class >= VI &&
>   		device->physical_device->rad_info.max_se >= 2;
>   
> +	/* TODO: Figure out how to use LOAD_CONTEXT_REG with previous gens. */
> +	device->has_load_context_reg =
> +		device->physical_device->rad_info.chip_class >= VI;
> +
>   	if (getenv("RADV_TRACE_FILE")) {
>   		const char *filename = getenv("RADV_TRACE_FILE");
>   
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 1628be1002..28a06f2a48 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -657,6 +657,7 @@ struct radv_device {
>   
>   	bool always_use_syncobj;
>   	bool has_distributed_tess;
> +	bool has_load_context_reg;
>   	bool pbb_allowed;
>   	bool dfsm_allowed;
>   	uint32_t tess_offchip_block_dw_size;
>