[03/15] ac: correct PKT3_COPY_DATA definitions

Submitted by Marek Olšák on Oct. 2, 2018, 10:35 p.m.

Details

Message ID 20181002223547.18345-4-maraeo@gmail.com
State New
Headers show
Series "A bunch of shared code and RadeonSI changes" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Oct. 2, 2018, 10:35 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/amd/common/sid.h                          | 11 +++++++++--
 src/amd/vulkan/radv_cmd_buffer.c              |  6 +++---
 src/amd/vulkan/radv_query.c                   |  8 ++++----
 src/gallium/drivers/radeonsi/si_compute.c     |  2 +-
 src/gallium/drivers/radeonsi/si_perfcounter.c |  6 +++---
 src/gallium/drivers/radeonsi/si_query.c       |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 7 files changed, 22 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index d20b5484223..b3321ea3a77 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -153,28 +153,35 @@ 
 #define   R_3F2_CONTROL                        0x3F2
 #define     S_3F2_IB_SIZE(x)                   (((unsigned)(x) & 0xfffff) << 0)
 #define     G_3F2_IB_SIZE(x)                   (((unsigned)(x) >> 0) & 0xfffff)
 #define     S_3F2_CHAIN(x)                     (((unsigned)(x) & 0x1) << 20)
 #define     G_3F2_CHAIN(x)                     (((unsigned)(x) >> 20) & 0x1)
 #define     S_3F2_VALID(x)                     (((unsigned)(x) & 0x1) << 23)
 
 #define PKT3_COPY_DATA			       0x40
 #define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
 #define			COPY_DATA_REG		0
-#define			COPY_DATA_MEM		1
+#define			COPY_DATA_SRC_MEM	1 /* only valid as source */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
 #define                 COPY_DATA_PERF          4
 #define                 COPY_DATA_IMM           5
 #define                 COPY_DATA_TIMESTAMP     9
 #define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
-#define                 COPY_DATA_MEM_ASYNC     5
+#define                 COPY_DATA_DST_MEM_GRBM	1 /* sync across GRBM, deprecated */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
+#define                 COPY_DATA_PERF          4
+#define                 COPY_DATA_DST_MEM       5
 #define		COPY_DATA_COUNT_SEL		(1 << 16)
 #define		COPY_DATA_WR_CONFIRM		(1 << 20)
+#define		COPY_DATA_ENGINE_PFP		(1 << 30)
 #define PKT3_PFP_SYNC_ME		       0x42
 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
 #define         EOP_INT_SEL(x)                          ((x) << 24)
 #define			EOP_INT_SEL_NONE			0
 #define			EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM	3
 #define         EOP_DATA_SEL(x)                         ((x) << 29)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d492456d6b8..339704990e2 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1290,21 +1290,21 @@  radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
 		++reg_count;
 	} else {
 		++reg_offset;
 		va += 4;
 	}
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		++reg_count;
 
 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 			COPY_DATA_DST_SEL(COPY_DATA_REG) |
 			(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
 	radeon_emit(cs, 0);
 
 	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 	radeon_emit(cs, 0);
 }
@@ -1420,21 +1420,21 @@  radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	uint64_t va = radv_buffer_get_va(image->bo);
 
 	va += image->offset + image->clear_value_offset;
 
 	if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
 		return;
 
 	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 			COPY_DATA_DST_SEL(COPY_DATA_REG) |
 			COPY_DATA_COUNT_SEL);
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, reg >> 2);
 	radeon_emit(cs, 0);
 
 	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
 	radeon_emit(cs, 0);
 }
@@ -3734,21 +3734,21 @@  radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 	if (info->indirect) {
 		uint64_t va = radv_buffer_get_va(info->indirect->bo);
 
 		va += info->indirect->offset + info->indirect_offset;
 
 		radv_cs_add_buffer(ws, cs, info->indirect->bo);
 
 		if (loc->sgpr_idx != -1) {
 			for (unsigned i = 0; i < 3; ++i) {
 				radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 						COPY_DATA_DST_SEL(COPY_DATA_REG));
 				radeon_emit(cs, (va +  4 * i));
 				radeon_emit(cs, (va + 4 * i) >> 32);
 				radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
 						 + loc->sgpr_idx * 4) >> 2) + i);
 				radeon_emit(cs, 0);
 			}
 		}
 
 		if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index d607d24cfc6..3af56266cea 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1046,31 +1046,31 @@  void radv_CmdCopyQueryPoolResults(
 				radeon_emit(cs, local_src_va);
 				radeon_emit(cs, local_src_va >> 32);
 				radeon_emit(cs, TIMESTAMP_NOT_READY >> 32);
 				radeon_emit(cs, 0xffffffff);
 				radeon_emit(cs, 4);
 			}
 			if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
 				uint64_t avail_dest_va = dest_va + elem_size;
 
 				radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
-						COPY_DATA_DST_SEL(COPY_DATA_MEM));
+				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+						COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
 				radeon_emit(cs, local_src_va);
 				radeon_emit(cs, local_src_va >> 32);
 				radeon_emit(cs, avail_dest_va);
 				radeon_emit(cs, avail_dest_va >> 32);
 			}
 
 			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
-					COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
 					((flags & VK_QUERY_RESULT_64_BIT) ? COPY_DATA_COUNT_SEL : 0));
 			radeon_emit(cs, local_src_va);
 			radeon_emit(cs, local_src_va >> 32);
 			radeon_emit(cs, dest_va);
 			radeon_emit(cs, dest_va >> 32);
 
 
 			assert(cs->cdw <= cdw_max);
 		}
 		break;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index e0c6902fec4..cbcd8e79c7b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -727,21 +727,21 @@  static void si_setup_tgsi_user_data(struct si_context *sctx,
 			uint64_t base_va = r600_resource(info->indirect)->gpu_address;
 			uint64_t va = base_va + info->indirect_offset;
 			int i;
 
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					 r600_resource(info->indirect),
 					 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 			for (i = 0; i < 3; ++i) {
 				radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 						COPY_DATA_DST_SEL(COPY_DATA_REG));
 				radeon_emit(cs, (va + 4 * i));
 				radeon_emit(cs, (va + 4 * i) >> 32);
 				radeon_emit(cs, (grid_size_reg >> 2) + i);
 				radeon_emit(cs, 0);
 			}
 		}
 	} else {
 		if (program->uses_grid_size) {
 			radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index c4f6e164fb5..de71572c8aa 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -552,21 +552,21 @@  static void si_pc_emit_select(struct si_context *sctx,
 static void si_pc_emit_start(struct si_context *sctx,
 			     struct r600_resource *buffer, uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
 	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
 				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-			COPY_DATA_DST_SEL(COPY_DATA_MEM));
+			COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
 	radeon_emit(cs, 1); /* immediate */
 	radeon_emit(cs, 0); /* unused */
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 
 	radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
 			       S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
 	radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
@@ -609,34 +609,34 @@  static void si_pc_emit_read(struct si_context *sctx,
 	if (!(regs->layout & SI_PC_FAKE)) {
 		if (regs->layout & SI_PC_REG_REVERSE)
 			reg_delta = -reg_delta;
 
 		for (idx = 0; idx < count; ++idx) {
 			if (regs->counters)
 				reg = regs->counters[idx];
 
 			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
-					COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
 					COPY_DATA_COUNT_SEL); /* 64 bits */
 			radeon_emit(cs, reg >> 2);
 			radeon_emit(cs, 0); /* unused */
 			radeon_emit(cs, va);
 			radeon_emit(cs, va >> 32);
 			va += sizeof(uint64_t);
 			reg += reg_delta;
 		}
 	} else {
 		for (idx = 0; idx < count; ++idx) {
 			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-					COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
 					COPY_DATA_COUNT_SEL);
 			radeon_emit(cs, 0); /* immediate */
 			radeon_emit(cs, 0);
 			radeon_emit(cs, va);
 			radeon_emit(cs, va >> 32);
 			va += sizeof(uint64_t);
 		}
 	}
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 80e84c23937..bdd7e2c060c 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -792,21 +792,21 @@  static void si_query_hw_do_emit_start(struct si_context *sctx,
 		for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
 			emit_sample_streamout(cs, va + 32 * stream, stream);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		/* Write the timestamp from the CP not waiting for
 		 * outstanding draws (top-of-pipe).
 		 */
 		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 		radeon_emit(cs, COPY_DATA_COUNT_SEL |
 				COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-				COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
+				COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		radeon_emit(cs, va);
 		radeon_emit(cs, va >> 32);
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
 		radeon_emit(cs, va >> 32);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b1d7437edb9..fceb9debc47 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -664,21 +664,21 @@  static void si_emit_draw_packets(struct si_context *sctx,
 	if (info->count_from_stream_output) {
 		struct si_streamout_target *t =
 			(struct si_streamout_target*)info->count_from_stream_output;
 		uint64_t va = t->buf_filled_size->gpu_address +
 			      t->buf_filled_size_offset;
 
 		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
 				       t->stride_in_dw);
 
 		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 			    COPY_DATA_DST_SEL(COPY_DATA_REG) |
 			    COPY_DATA_WR_CONFIRM);
 		radeon_emit(cs, va);     /* src address lo */
 		radeon_emit(cs, va >> 32); /* src address hi */
 		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
 		radeon_emit(cs, 0); /* unused */
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      t->buf_filled_size, RADEON_USAGE_READ,
 				      RADEON_PRIO_SO_FILLED_SIZE);