[v2] radv: make sure to wait for CP DMA when needed

Submitted by Samuel Pitoiset on July 2, 2018, 1:13 p.m.

Details

Message ID 20180702131328.31396-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: make sure to wait for CP DMA when needed" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset July 2, 2018, 1:13 p.m.
This might fix some synchronization issues. I don't know if
that will affect performance but it's required for correctness.

v2: - wait for CP DMA in CmdWaitEvents()
    - track if CP DMA is used

CC: <mesa-stable@lists.freedesktop.org>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 10 ++++++++++
 src/amd/vulkan/radv_private.h    |  5 +++++
 src/amd/vulkan/si_cmd_buffer.c   | 24 +++++++++++++++++++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index e020153c29..eacc016982 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2594,6 +2594,11 @@  VkResult radv_EndCommandBuffer(
 		si_emit_cache_flush(cmd_buffer);
 	}
 
+	/* Make sure CP DMA is idle at the end of IBs because the kernel
+	 * doesn't wait for it.
+	 */
+	si_cp_dma_wait_for_idle(cmd_buffer);
+
 	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
 
 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
@@ -4238,6 +4243,11 @@  static void write_event(struct radv_cmd_buffer *cmd_buffer,
 	/* TODO: this is overkill. Probably should figure something out from
 	 * the stage mask. */
 
+	/* Make sure CP DMA is idle because the driver might have performed a
+	 * DMA operation for copying or filling buffers/images.
+	 */
+	si_cp_dma_wait_for_idle(cmd_buffer);
+
 	si_cs_emit_write_event_eop(cs,
 				   cmd_buffer->device->physical_device->rad_info.chip_class,
 				   radv_cmd_buffer_uses_mec(cmd_buffer),
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index df335b43d8..9514d43f28 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -978,6 +978,9 @@  struct radv_cmd_state {
 	uint32_t last_num_instances;
 	uint32_t last_first_instance;
 	uint32_t last_vertex_offset;
+
+	/* Whether CP DMA is busy/idle. */
+	bool dma_is_busy;
 };
 
 struct radv_cmd_pool {
@@ -1090,6 +1093,8 @@  void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
                         unsigned size);
 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
 			    uint64_t size, unsigned value);
+void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
+
 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
 bool
 radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 454fd8c39c..ac0e8addd0 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1040,7 +1040,6 @@  static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	uint32_t header = 0, command = 0;
 
-	assert(size);
 	assert(size <= cp_dma_max_byte_count(cmd_buffer));
 
 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
@@ -1217,6 +1216,8 @@  void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 	}
 	if (realign_size)
 		si_cp_dma_realign_engine(cmd_buffer, realign_size);
+
+	cmd_buffer->state.dma_is_busy = true;
 }
 
 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
@@ -1241,6 +1242,27 @@  void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
 		size -= byte_count;
 		va += byte_count;
 	}
+
+	cmd_buffer->state.dma_is_busy = true;
+}
+
+void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
+{
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
+		return;
+
+	if (!cmd_buffer->state.dma_is_busy)
+		return;
+
+	/* Issue a dummy DMA that copies zero bytes.
+	 *
+	 * The DMA engine will see that there's no work to do and skip this
+	 * DMA request, however, the CP will see the sync flag and still wait
+	 * for all DMAs to complete.
+	 */
+	si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
+
+	cmd_buffer->state.dma_is_busy = false;
 }
 
 /* For MSAA sample positions. */