[4/8] radeonsi: move PKT3_WRITE_DATA generation into a helper function

Submitted by Marek Olšák on Jan. 18, 2019, 4:43 p.m.

Details

Message ID 20190118164359.19461-5-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: PKT3_WRITE_DATA for small uploads" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Jan. 18, 2019, 4:43 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 25 +++++++++++++++++++
 src/gallium/drivers/radeonsi/si_descriptors.c | 10 ++------
 src/gallium/drivers/radeonsi/si_fence.c       | 21 ++++++----------
 src/gallium/drivers/radeonsi/si_pipe.c        | 13 ++--------
 src/gallium/drivers/radeonsi/si_pipe.h        |  3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c  | 12 +++------
 6 files changed, 43 insertions(+), 41 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 80673f3f5f2..59360c0d4aa 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -574,10 +574,35 @@  void si_test_gds(struct si_context *sctx)
 
 	pipe_buffer_read(ctx, dst, 0, sizeof(r), r);
 	printf("GDS clear = %08x %08x %08x %08x -> %s\n", r[0], r[1], r[2], r[3],
 			r[0] == 0xc1ea4146 && r[1] == 0xc1ea4146 &&
 			r[2] == 0xc1ea4146 && r[3] == 0xc1ea4146 ? "pass" : "fail");
 
 	pipe_resource_reference(&src, NULL);
 	pipe_resource_reference(&dst, NULL);
 	exit(0);
 }
+
+void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf,
+		      unsigned offset, unsigned size, unsigned dst_sel,
+		      unsigned engine, const void *data)
+{
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+	assert(offset % 4 == 0);
+	assert(size % 4 == 0);
+
+	if (sctx->chip_class == SI && dst_sel == V_370_MEM)
+		dst_sel = V_370_MEM_GRBM;
+
+	radeon_add_to_buffer_list(sctx, cs, buf,
+				  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+	uint64_t va = buf->gpu_address + offset;
+
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size/4, 0));
+	radeon_emit(cs, S_370_DST_SEL(dst_sel) |
+		    S_370_WR_CONFIRM(1) |
+		    S_370_ENGINE_SEL(engine));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit_array(cs, (const uint32_t*)data, size/4);
+}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 71ae00c53cb..ca62848296b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1814,35 +1814,29 @@  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 			}
 		}
 	}
 }
 
 static void si_upload_bindless_descriptor(struct si_context *sctx,
 					  unsigned desc_slot,
 					  unsigned num_dwords)
 {
 	struct si_descriptors *desc = &sctx->bindless_descriptors;
-	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	unsigned desc_slot_offset = desc_slot * 16;
 	uint32_t *data;
 	uint64_t va;
 
 	data = desc->list + desc_slot_offset;
 	va = desc->gpu_address + desc_slot_offset * 4;
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit_array(cs, data, num_dwords);
+	si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address,
+			 num_dwords * 4, V_370_TC_L2, V_370_ME, data);
 }
 
 static void si_upload_bindless_descriptors(struct si_context *sctx)
 {
 	if (!sctx->bindless_descriptors_dirty)
 		return;
 
 	/* Wait for graphics/compute to be idle before updating the resident
 	 * descriptors directly in memory, in case the GPU is using them.
 	 */
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index 46d0289c90b..84bf4d10c20 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -252,35 +252,30 @@  static void si_fine_fence_set(struct si_context *ctx,
 	assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
 
 	/* Use uncached system memory for the fence. */
 	u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4,
 		       &fine->offset, (struct pipe_resource **)&fine->buf, (void **)&fence_ptr);
 	if (!fine->buf)
 		return;
 
 	*fence_ptr = 0;
 
-	uint64_t fence_va = fine->buf->gpu_address + fine->offset;
-
-	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
-				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 	if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
-		struct radeon_cmdbuf *cs = ctx->gfx_cs;
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(ctx->chip_class >= CIK ? V_370_MEM
-								     : V_370_MEM_GRBM) |
-			S_370_WR_CONFIRM(1) |
-			S_370_ENGINE_SEL(V_370_PFP));
-		radeon_emit(cs, fence_va);
-		radeon_emit(cs, fence_va >> 32);
-		radeon_emit(cs, 0x80000000);
+		uint32_t value = 0x80000000;
+
+		si_cp_write_data(ctx, fine->buf, fine->offset, 4,
+				 V_370_MEM, V_370_PFP, &value);
 	} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
+		uint64_t fence_va = fine->buf->gpu_address + fine->offset;
+
+		radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
+					  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 		si_cp_release_mem(ctx,
 				  V_028A90_BOTTOM_OF_PIPE_TS, 0,
 				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  NULL, fence_va, 0x80000000,
 				  PIPE_QUERY_GPU_FINISHED);
 	} else {
 		assert(false);
 	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index f68ef3f67ce..3bb8e04e4ad 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -523,31 +523,22 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	sctx->sample_mask = 0xffff;
 
 	if (sctx->chip_class >= GFX9) {
 		sctx->wait_mem_scratch = r600_resource(
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
-		struct radeon_cmdbuf *cs = sctx->gfx_cs;
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM
-								      : V_370_MEM_GRBM) |
-			    S_370_WR_CONFIRM(1) |
-			    S_370_ENGINE_SEL(V_370_ME));
-		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
-		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
-		radeon_emit(cs, sctx->wait_mem_number);
-		radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch,
-					  RADEON_USAGE_WRITE, RADEON_PRIO_FENCE);
+		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
+				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
 	}
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
 	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
 	if (sctx->chip_class == CIK) {
 		sctx->null_const_buf.buffer =
 			pipe_aligned_buffer_create(screen,
 						   SI_RESOURCE_FLAG_32BIT,
 						   PIPE_USAGE_DEFAULT, 16,
 						   sctx->screen->info.tcc_cache_line_size);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 9943998a707..d874f215a21 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1178,20 +1178,23 @@  void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
 			    enum si_coherency coher, enum si_cache_policy cache_policy);
 void si_cp_dma_copy_buffer(struct si_context *sctx,
 			   struct pipe_resource *dst, struct pipe_resource *src,
 			   uint64_t dst_offset, uint64_t src_offset, unsigned size,
 			   unsigned user_flags, enum si_coherency coher,
 			   enum si_cache_policy cache_policy);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
 			      uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_test_gds(struct si_context *sctx);
+void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf,
+		      unsigned offset, unsigned size, unsigned dst_sel,
+		      unsigned engine, const void *data);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
 		struct radeon_saved_cs *saved, bool get_buffer_list);
 void si_clear_saved_cs(struct radeon_saved_cs *saved);
 void si_destroy_saved_cs(struct si_saved_cs *scs);
 void si_auto_log_cs(void *data, struct u_log_context *log);
 void si_log_hw_flush(struct si_context *sctx);
 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log);
 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 9a80bd81327..1ff74e77433 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1585,31 +1585,25 @@  si_draw_rectangle(struct blitter_context *blitter,
 	/* Don't set per-stage shader pointers for VS. */
 	sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX);
 	sctx->vertex_buffer_pointer_dirty = false;
 
 	si_draw_vbo(pipe, &info);
 }
 
 void si_trace_emit(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
-	uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
 	uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM
-							      : V_370_MEM_GRBM) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, trace_id);
+	si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf,
+			 0, 4, V_370_MEM, V_370_ME, &trace_id);
+
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
 	if (sctx->log)
 		u_log_flush(sctx->log);
 }
 
 void si_init_draw_functions(struct si_context *sctx)
 {
 	sctx->b.draw_vbo = si_draw_vbo;