[1/8] radeonsi: correct WRITE_DATA.DST_SEL definitions

Submitted by Marek Olšák on Jan. 18, 2019, 4:43 p.m.

Details

Message ID 20190118164359.19461-2-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: PKT3_WRITE_DATA for small uploads" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Jan. 18, 2019, 4:43 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/amd/common/sid.h                         |  4 ++--
 src/amd/vulkan/radv_cmd_buffer.c             | 16 ++++++++--------
 src/amd/vulkan/radv_meta_buffer.c            |  2 +-
 src/amd/vulkan/radv_query.c                  |  2 +-
 src/gallium/drivers/radeonsi/si_fence.c      |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c       |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c |  2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 12e80df4884..5c8eee0124d 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -126,25 +126,25 @@ 
 #define   R_370_CONTROL				0x370 /* 0x[packet number][word index] */
 #define     S_370_ENGINE_SEL(x)			(((unsigned)(x) & 0x3) << 30)
 #define       V_370_ME				0
 #define       V_370_PFP				1
 #define       V_370_CE				2
 #define       V_370_DE				3
 #define     S_370_WR_CONFIRM(x)			(((unsigned)(x) & 0x1) << 20)
 #define     S_370_WR_ONE_ADDR(x)		(((unsigned)(x) & 0x1) << 16)
 #define     S_370_DST_SEL(x)			(((unsigned)(x) & 0xf) << 8)
 #define       V_370_MEM_MAPPED_REGISTER		0
-#define       V_370_MEMORY_SYNC			1
+#define       V_370_MEM_GRBM			1 /* sync across GRBM */
 #define       V_370_TC_L2			2
 #define       V_370_GDS				3
 #define       V_370_RESERVED			4
-#define       V_370_MEM_ASYNC			5
+#define       V_370_MEM				5 /* not on SI */
 #define   R_371_DST_ADDR_LO			0x371
 #define   R_372_DST_ADDR_HI			0x372
 #define PKT3_DRAW_INDEX_INDIRECT_MULTI         0x38
 #define PKT3_MEM_SEMAPHORE                     0x39
 #define PKT3_MPEG_INDEX                        0x3A /* not on CIK */
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
 #define		WAIT_REG_MEM_NOT_EQUAL		4
 #define		WAIT_REG_MEM_GREATER_OR_EQUAL   5
 #define         WAIT_REG_MEM_MEM_SPACE(x)       (((unsigned)(x) & 0x3) << 4)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f41d6c0b3e7..20aeda96da2 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -446,21 +446,21 @@  radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 
 static void
 radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
 			    unsigned count, const uint32_t *data)
 {
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
 	radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit_array(cs, data, count);
 }
 
 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
 {
 	struct radv_device *device = cmd_buffer->device;
@@ -1237,21 +1237,21 @@  radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
 		++reg_count;
 	} else {
 		++reg_offset;
 		va += 4;
 	}
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		++reg_count;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
 		radeon_emit(cs, ds_clear_value.stencil);
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		radeon_emit(cs, fui(ds_clear_value.depth));
 }
 
@@ -1261,21 +1261,21 @@  radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 static void
 radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
 				   struct radv_image *image,
 				   uint32_t value)
 {
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	uint64_t va = radv_buffer_get_va(image->bo);
 	va += image->offset + image->tc_compat_zrange_offset;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, value);
 }
 
 static void
 radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
 				      struct radv_image *image,
@@ -1374,21 +1374,21 @@  void
 radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, bool value)
 {
 	uint64_t pred_val = value;
 	uint64_t va = radv_buffer_get_va(image->bo);
 	va += image->offset + image->fce_pred_offset;
 
 	assert(radv_image_has_dcc(image));
 
 	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
-	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
 				    S_370_WR_CONFIRM(1) |
 				    S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cmd_buffer->cs, va);
 	radeon_emit(cmd_buffer->cs, va >> 32);
 	radeon_emit(cmd_buffer->cs, pred_val);
 	radeon_emit(cmd_buffer->cs, pred_val >> 32);
 }
 
 /**
  * Update the DCC predicate to reflect the compression state.
@@ -1397,21 +1397,21 @@  void
 radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, bool value)
 {
 	uint64_t pred_val = value;
 	uint64_t va = radv_buffer_get_va(image->bo);
 	va += image->offset + image->dcc_pred_offset;
 
 	assert(radv_image_has_dcc(image));
 
 	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
-	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
 				    S_370_WR_CONFIRM(1) |
 				    S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cmd_buffer->cs, va);
 	radeon_emit(cmd_buffer->cs, va >> 32);
 	radeon_emit(cmd_buffer->cs, pred_val);
 	radeon_emit(cmd_buffer->cs, pred_val >> 32);
 }
 
 /**
  * Update the fast clear color values if the image is bound as a color buffer.
@@ -1453,21 +1453,21 @@  radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 			      uint32_t color_values[2])
 {
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	uint64_t va = radv_buffer_get_va(image->bo);
 
 	va += image->offset + image->clear_value_offset;
 
 	assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, color_values[0]);
 	radeon_emit(cs, color_values[1]);
 }
 
 /**
  * Update the clear color values for this image.
@@ -4665,30 +4665,30 @@  static void write_event(struct radv_cmd_buffer *cmd_buffer,
 	 */
 	if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
 			 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
 		si_cp_dma_wait_for_idle(cmd_buffer);
 
 	/* TODO: Emit EOS events for syncing PS/CS stages. */
 
 	if (!(stageMask & ~top_of_pipe_flags)) {
 		/* Just need to sync the PFP engine. */
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 				S_370_WR_CONFIRM(1) |
 				S_370_ENGINE_SEL(V_370_PFP));
 		radeon_emit(cs, va);
 		radeon_emit(cs, va >> 32);
 		radeon_emit(cs, value);
 	} else if (!(stageMask & ~post_index_fetch_flags)) {
 		/* Sync ME because PFP reads index and indirect buffers. */
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 				S_370_WR_CONFIRM(1) |
 				S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cs, va);
 		radeon_emit(cs, va >> 32);
 		radeon_emit(cs, value);
 	} else {
 		/* Otherwise, sync all prior GPU work using an EOP event. */
 		si_cs_emit_write_event_eop(cs,
 					   cmd_buffer->device->physical_device->rad_info.chip_class,
 					   radv_cmd_buffer_uses_mec(cmd_buffer),
diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
index 76854d7bbad..ab70f4bae6e 100644
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -515,21 +515,21 @@  void radv_CmdUpdateBuffer(
 
 	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
 		si_emit_cache_flush(cmd_buffer);
 
 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
 
 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
 
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
-		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
+		                                V_370_MEM : V_370_MEM_GRBM) |
 		                            S_370_WR_CONFIRM(1) |
 		                            S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cmd_buffer->cs, va);
 		radeon_emit(cmd_buffer->cs, va >> 32);
 		radeon_emit_array(cmd_buffer->cs, pData, words);
 
 		if (unlikely(cmd_buffer->device->trace_bo))
 			radv_cmd_buffer_trace_emit(cmd_buffer);
 	} else {
 		uint32_t buf_offset;
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 72429635290..9777e0c5c8a 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1685,21 +1685,21 @@  void radv_CmdWriteTimestamp(
 		num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
 
 	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
 
 	for (unsigned i = 0; i < num_queries; i++) {
 		switch(pipelineStage) {
 		case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
 			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 			radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
 				    COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-				    COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
+				    COPY_DATA_DST_SEL(V_370_MEM));
 			radeon_emit(cs, 0);
 			radeon_emit(cs, 0);
 			radeon_emit(cs, query_va);
 			radeon_emit(cs, query_va >> 32);
 			break;
 		default:
 			si_cs_emit_write_event_eop(cs,
 						   cmd_buffer->device->physical_device->rad_info.chip_class,
 						   mec,
 						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index b6920c95e34..be394119af6 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -259,21 +259,21 @@  static void si_fine_fence_set(struct si_context *ctx,
 
 	*fence_ptr = 0;
 
 	uint64_t fence_va = fine->buf->gpu_address + fine->offset;
 
 	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
 				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 	if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
 		struct radeon_cmdbuf *cs = ctx->gfx_cs;
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 		radeon_emit(cs, fence_va);
 		radeon_emit(cs, fence_va >> 32);
 		radeon_emit(cs, 0x80000000);
 	} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
 		si_cp_release_mem(ctx,
 				  V_028A90_BOTTOM_OF_PIPE_TS, 0,
 				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 				  EOP_DATA_SEL_VALUE_32BIT,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 0bab41c9a0c..b2eb91dca92 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -525,21 +525,21 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	if (sctx->chip_class >= GFX9) {
 		sctx->wait_mem_scratch = r600_resource(
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
 		struct radeon_cmdbuf *cs = sctx->gfx_cs;
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_GRBM) |
 			    S_370_WR_CONFIRM(1) |
 			    S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
 		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
 		radeon_emit(cs, sctx->wait_mem_number);
 		radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch,
 					  RADEON_USAGE_WRITE, RADEON_PRIO_FENCE);
 	}
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index dd670f3f670..ea8c5d054b5 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1589,21 +1589,21 @@  si_draw_rectangle(struct blitter_context *blitter,
 	si_draw_vbo(pipe, &info);
 }
 
 void si_trace_emit(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
 	uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_GRBM) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, trace_id);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
 	if (sctx->log)
 		u_log_flush(sctx->log);