[11/16] radv: Implement vkCmdDispatchBase.

Submitted by Bas Nieuwenhuizen on March 7, 2018, 2:47 p.m.

Details

Message ID 20180307144744.4816-11-bas@basnieuwenhuizen.nl
State New
Series "Series without cover letter"
Headers show

Commit Message

Bas Nieuwenhuizen March 7, 2018, 2:47 p.m.
Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 41 +++++++++++++++++++++++++++++++++++++++-
 src/amd/vulkan/radv_device.c     |  3 +--
 2 files changed, 41 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 9238f76305..1e7d035d34 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3181,6 +3181,11 @@  struct radv_dispatch_info {
 	 */
 	uint32_t blocks[3];
 
+	/**
+	 * A starting offset for the grid. If unaligned is set, the offset
+	 * must still be aligned.
+	 */
+	uint32_t offsets[3];
 	/**
 	 * Whether it's an unaligned compute dispatch.
 	 */
@@ -3249,6 +3254,7 @@  radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 		}
 	} else {
 		unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
+		unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] };
 
 		if (info->unaligned) {
 			unsigned *cs_block_size = compute_shader->info.cs.block_size;
@@ -3268,6 +3274,11 @@  radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 			blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
 			blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
 
+			for(unsigned i = 0; i < 3; ++i) {
+				assert(offsets[i] % cs_block_size[i] == 0);
+				offsets[i] /= cs_block_size[i];
+			}
+
 			radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
 			radeon_emit(cs,
 				    S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
@@ -3293,6 +3304,19 @@  radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 			radeon_emit(cs, blocks[2]);
 		}
 
+		if (offsets[0] || offsets[1] || offsets[2]) {
+			radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+			radeon_emit(cs, offsets[0]);
+			radeon_emit(cs, offsets[1]);
+			radeon_emit(cs, offsets[2]);
+
+			/* The blocks in the packet are not counts but end values. */
+			for (unsigned i = 0; i < 3; ++i)
+				blocks[i] += offsets[i];
+		} else {
+			dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
+		}
+
 		radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
 				PKT3_SHADER_TYPE_S(1));
 		radeon_emit(cs, blocks[0]);
@@ -3368,8 +3392,11 @@  radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
 	radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
 }
 
-void radv_CmdDispatch(
+void radv_CmdDispatchBase(
 	VkCommandBuffer                             commandBuffer,
+	uint32_t                                    base_x,
+	uint32_t                                    base_y,
+	uint32_t                                    base_z,
 	uint32_t                                    x,
 	uint32_t                                    y,
 	uint32_t                                    z)
@@ -3381,9 +3408,21 @@  void radv_CmdDispatch(
 	info.blocks[1] = y;
 	info.blocks[2] = z;
 
+	info.offsets[0] = base_x;
+	info.offsets[1] = base_y;
+	info.offsets[2] = base_z;
 	radv_dispatch(cmd_buffer, &info);
 }
 
+void radv_CmdDispatch(
+	VkCommandBuffer                             commandBuffer,
+	uint32_t                                    x,
+	uint32_t                                    y,
+	uint32_t                                    z)
+{
+	radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
+}
+
 void radv_CmdDispatchIndirect(
 	VkCommandBuffer                             commandBuffer,
 	VkBuffer                                    _buffer,
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 4ce463278e..0e7ee8ef9b 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1254,8 +1254,7 @@  VkResult radv_CreateDevice(
 	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
 				     max_threads_per_block / 64);
 
-	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
-				     S_00B800_FORCE_START_AT_000(1);
+	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
 
 	if (device->physical_device->rad_info.chip_class >= CIK) {
 		/* If the KMD allows it (there is a KMD hw register for it),