[6/8] radv: handle sample locations during automatic layout transitions

Submitted by Samuel Pitoiset on May 30, 2019, 2:05 p.m.

Details

Message ID 20190530140521.31502-7-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: add support for sample locations during layout transitions" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset May 30, 2019, 2:05 p.m.
From the Vulkan spec 1.1.109:

   "Some implementations may need to evaluate depth image values
    while performing image layout transitions. To accommodate this,
    instances of the VkSampleLocationsInfoEXT structure can be
    specified for each situation where an explicit or automatic
    layout transition has to take place. [...] and
    VkRenderPassSampleLocationsBeginInfoEXT can be chained from
    VkRenderPassBeginInfo to provide sample locations for layout
    transitions performed implicitly by a render pass instance."

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 155 ++++++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h    |   9 ++
 2 files changed, 150 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 570acaa0905..81b3f5f9886 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2645,11 +2645,55 @@  void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
 	                                                      NULL);
 }
 
+static uint32_t
+radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	uint32_t subpass_id = state->subpass - state->pass->subpasses;
+
+	/* The id of this subpass shouldn't exceed the number of subpasses in
+	 * this render pass minus 1.
+	 */
+	assert(subpass_id < state->pass->subpass_count);
+	return subpass_id;
+}
+
+static struct radv_sample_locations_state *
+radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
+				     uint32_t att_idx)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+	struct radv_image_view *view = state->framebuffer->attachments[att_idx].attachment;
+
+	if (view->image->info.samples == 1)
+		return NULL;
+
+	if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
+		/* Return the initial sample locations if this is the initial
+		 * layout transition of the given subpass attachemnt.
+		 */
+		if (state->attachments[att_idx].sample_location.count > 0)
+			return &state->attachments[att_idx].sample_location;
+	} else {
+		/* Otherwise return the subpass sample locations if defined. */
+		if (state->subpass_sample_locs) {
+			for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+				if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+					return &state->subpass_sample_locs[i].sample_location;
+			}
+		}
+	}
+
+	return NULL;
+}
+
 static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
 						 struct radv_subpass_attachment att)
 {
 	unsigned idx = att.attachment;
 	struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment;
+	struct radv_sample_locations_state *sample_locs;
 	VkImageSubresourceRange range;
 	range.aspectMask = 0;
 	range.baseMipLevel = view->base_mip;
@@ -2668,10 +2712,15 @@  static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
 		range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
 	}
 
+	/* Get the subpass sample locations for the given attachment, if NULL
+	 * is returned the driver will use the default HW locations.
+	 */
+	sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx);
+
 	radv_handle_image_transition(cmd_buffer,
 				     view->image,
 				     cmd_buffer->state.attachments[idx].current_layout,
-				     att.layout, 0, 0, &range, NULL);
+				     att.layout, 0, 0, &range, sample_locs);
 
 	cmd_buffer->state.attachments[idx].current_layout = att.layout;
 
@@ -2687,6 +2736,89 @@  radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
 }
 
+static VkResult
+radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
+				      struct radv_render_pass *pass,
+				      const VkRenderPassBeginInfo *info)
+{
+	const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
+		vk_find_struct_const(info->pNext,
+				     RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	struct radv_framebuffer *framebuffer = state->framebuffer;
+
+	if (!sample_locs) {
+		state->subpass_sample_locs = NULL;
+		return VK_SUCCESS;
+	}
+
+	for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
+		const VkAttachmentSampleLocationsEXT *att_sample_locs =
+			&sample_locs->pAttachmentInitialSampleLocations[i];
+		uint32_t att_idx = att_sample_locs->attachmentIndex;
+		struct radv_attachment_info *att = &framebuffer->attachments[att_idx];
+		struct radv_image *image = att->attachment->image;
+
+		assert(vk_format_is_depth_or_stencil(image->vk_format));
+
+		/* From the Vulkan spec 1.1.108:
+		 *
+		 * "If the image referenced by the framebuffer attachment at
+		 *  index attachmentIndex was not created with
+		 *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
+		 *  then the values specified in sampleLocationsInfo are
+		 *  ignored."
+		 */
+		if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
+			continue;
+
+		const VkSampleLocationsInfoEXT *sample_locs_info =
+			&att_sample_locs->sampleLocationsInfo;
+
+		state->attachments[att_idx].sample_location.per_pixel =
+			sample_locs_info->sampleLocationsPerPixel;
+		state->attachments[att_idx].sample_location.grid_size =
+			sample_locs_info->sampleLocationGridSize;
+		state->attachments[att_idx].sample_location.count =
+			sample_locs_info->sampleLocationsCount;
+		typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
+			     sample_locs_info->pSampleLocations,
+			     sample_locs_info->sampleLocationsCount);
+	}
+
+	state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
+					      sample_locs->postSubpassSampleLocationsCount *
+					      sizeof(state->subpass_sample_locs[0]),
+					      8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+	if (state->subpass_sample_locs == NULL) {
+		cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+		return cmd_buffer->record_result;
+	}
+
+	state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
+
+	for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
+		const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
+			&sample_locs->pPostSubpassSampleLocations[i];
+		const VkSampleLocationsInfoEXT *sample_locs_info =
+			&subpass_sample_locs_info->sampleLocationsInfo;
+
+		state->subpass_sample_locs[i].subpass_idx =
+			subpass_sample_locs_info->subpassIndex;
+		state->subpass_sample_locs[i].sample_location.per_pixel =
+			sample_locs_info->sampleLocationsPerPixel;
+		state->subpass_sample_locs[i].sample_location.grid_size =
+			sample_locs_info->sampleLocationGridSize;
+		state->subpass_sample_locs[i].sample_location.count =
+			sample_locs_info->sampleLocationsCount;
+		typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
+			     sample_locs_info->pSampleLocations,
+			     sample_locs_info->sampleLocationsCount);
+	}
+
+	return VK_SUCCESS;
+}
+
 static VkResult
 radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
 				 struct radv_render_pass *pass,
@@ -2741,6 +2873,7 @@  radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
 		}
 
 		state->attachments[i].current_layout = att->initial_layout;
+		state->attachments[i].sample_location.count = 0;
 	}
 
 	return VK_SUCCESS;
@@ -3171,6 +3304,7 @@  VkResult radv_EndCommandBuffer(
 	si_cp_dma_wait_for_idle(cmd_buffer);
 
 	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
 
 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
 		return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -3669,19 +3803,6 @@  void radv_TrimCommandPool(
 	}
 }
 
-static uint32_t
-radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
-{
-	struct radv_cmd_state *state = &cmd_buffer->state;
-	uint32_t subpass_id = state->subpass - state->pass->subpasses;
-
-	/* The id of this subpass shouldn't exceed the number of subpasses in
-	 * this render pass minus 1.
-	 */
-	assert(subpass_id < state->pass->subpass_count);
-	return subpass_id;
-}
-
 static void
 radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
 			      uint32_t subpass_id)
@@ -3751,6 +3872,10 @@  void radv_CmdBeginRenderPass(
 	if (result != VK_SUCCESS)
 		return;
 
+	result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
+	if (result != VK_SUCCESS)
+		return;
+
 	radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
 }
 
@@ -4593,11 +4718,13 @@  void radv_CmdEndRenderPass(
 	radv_cmd_buffer_end_subpass(cmd_buffer);
 
 	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
 
 	cmd_buffer->state.pass = NULL;
 	cmd_buffer->state.subpass = NULL;
 	cmd_buffer->state.attachments = NULL;
 	cmd_buffer->state.framebuffer = NULL;
+	cmd_buffer->state.subpass_sample_locs = NULL;
 }
 
 void radv_CmdEndRenderPass2KHR(
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 180f8b976ab..76cc79b387d 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1024,6 +1024,7 @@  struct radv_attachment_state {
 	uint32_t                                     cleared_views;
 	VkClearValue                                 clear_value;
 	VkImageLayout                                current_layout;
+	struct radv_sample_locations_state	     sample_location;
 };
 
 struct radv_descriptor_state {
@@ -1035,6 +1036,11 @@  struct radv_descriptor_state {
 	uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
 };
 
+struct radv_subpass_sample_locs_state {
+	uint32_t subpass_idx;
+	struct radv_sample_locations_state sample_location;
+};
+
 struct radv_cmd_state {
 	/* Vertex descriptors */
 	uint64_t                                      vb_va;
@@ -1057,6 +1063,9 @@  struct radv_cmd_state {
 	struct radv_streamout_state                  streamout;
 	VkRect2D                                     render_area;
 
+	uint32_t                                     num_subpass_sample_locs;
+	struct radv_subpass_sample_locs_state *      subpass_sample_locs;
+
 	/* Index buffer */
 	struct radv_buffer                           *index_buffer;
 	uint64_t                                     index_offset;

Comments

On Thu, May 30, 2019 at 4:02 PM Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
> From the Vulkan spec 1.1.109:
>
>    "Some implementations may need to evaluate depth image values
>     while performing image layout transitions. To accommodate this,
>     instances of the VkSampleLocationsInfoEXT structure can be
>     specified for each situation where an explicit or automatic
>     layout transition has to take place. [...] and
>     VkRenderPassSampleLocationsBeginInfoEXT can be chained from
>     VkRenderPassBeginInfo to provide sample locations for layout
>     transitions performed implicitly by a render pass instance."
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 155 ++++++++++++++++++++++++++++---
>  src/amd/vulkan/radv_private.h    |   9 ++
>  2 files changed, 150 insertions(+), 14 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 570acaa0905..81b3f5f9886 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2645,11 +2645,55 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
>                                                               NULL);
>  }
>
> +static uint32_t
> +radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
> +{
> +       struct radv_cmd_state *state = &cmd_buffer->state;
> +       uint32_t subpass_id = state->subpass - state->pass->subpasses;
> +
> +       /* The id of this subpass shouldn't exceed the number of subpasses in
> +        * this render pass minus 1.
> +        */
> +       assert(subpass_id < state->pass->subpass_count);
> +       return subpass_id;
> +}
> +
> +static struct radv_sample_locations_state *
> +radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
> +                                    uint32_t att_idx)
> +{
> +       struct radv_cmd_state *state = &cmd_buffer->state;
> +       uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);

On the start of the first subpass this may not work as the subpass is
not set yet?

> +       struct radv_image_view *view = state->framebuffer->attachments[att_idx].attachment;
> +
> +       if (view->image->info.samples == 1)
> +               return NULL;
> +
> +       if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
> +               /* Return the initial sample locations if this is the initial
> +                * layout transition of the given subpass attachemnt.
> +                */
> +               if (state->attachments[att_idx].sample_location.count > 0)
> +                       return &state->attachments[att_idx].sample_location;
> +       } else {
> +               /* Otherwise return the subpass sample locations if defined. */
> +               if (state->subpass_sample_locs) {
> +                       for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
> +                               if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
> +                                       return &state->subpass_sample_locs[i].sample_location;
> +                       }
> +               }
> +       }
> +
> +       return NULL;
> +}
> +
>  static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
>                                                  struct radv_subpass_attachment att)
>  {
>         unsigned idx = att.attachment;
>         struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment;
> +       struct radv_sample_locations_state *sample_locs;
>         VkImageSubresourceRange range;
>         range.aspectMask = 0;
>         range.baseMipLevel = view->base_mip;
> @@ -2668,10 +2712,15 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
>                 range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
>         }
>
> +       /* Get the subpass sample locations for the given attachment, if NULL
> +        * is returned the driver will use the default HW locations.
> +        */
> +       sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx);
> +
>         radv_handle_image_transition(cmd_buffer,
>                                      view->image,
>                                      cmd_buffer->state.attachments[idx].current_layout,
> -                                    att.layout, 0, 0, &range, NULL);
> +                                    att.layout, 0, 0, &range, sample_locs);
>
>         cmd_buffer->state.attachments[idx].current_layout = att.layout;
>
> @@ -2687,6 +2736,89 @@ radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
>         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
>  }
>
> +static VkResult
> +radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
> +                                     struct radv_render_pass *pass,
> +                                     const VkRenderPassBeginInfo *info)
> +{
> +       const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
> +               vk_find_struct_const(info->pNext,
> +                                    RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
> +       struct radv_cmd_state *state = &cmd_buffer->state;
> +       struct radv_framebuffer *framebuffer = state->framebuffer;
> +
> +       if (!sample_locs) {
> +               state->subpass_sample_locs = NULL;
> +               return VK_SUCCESS;
> +       }
> +
> +       for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
> +               const VkAttachmentSampleLocationsEXT *att_sample_locs =
> +                       &sample_locs->pAttachmentInitialSampleLocations[i];
> +               uint32_t att_idx = att_sample_locs->attachmentIndex;
> +               struct radv_attachment_info *att = &framebuffer->attachments[att_idx];
> +               struct radv_image *image = att->attachment->image;
> +
> +               assert(vk_format_is_depth_or_stencil(image->vk_format));
> +
> +               /* From the Vulkan spec 1.1.108:
> +                *
> +                * "If the image referenced by the framebuffer attachment at
> +                *  index attachmentIndex was not created with
> +                *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
> +                *  then the values specified in sampleLocationsInfo are
> +                *  ignored."
> +                */
> +               if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
> +                       continue;
> +
> +               const VkSampleLocationsInfoEXT *sample_locs_info =
> +                       &att_sample_locs->sampleLocationsInfo;
> +
> +               state->attachments[att_idx].sample_location.per_pixel =
> +                       sample_locs_info->sampleLocationsPerPixel;
> +               state->attachments[att_idx].sample_location.grid_size =
> +                       sample_locs_info->sampleLocationGridSize;
> +               state->attachments[att_idx].sample_location.count =
> +                       sample_locs_info->sampleLocationsCount;
> +               typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
> +                            sample_locs_info->pSampleLocations,
> +                            sample_locs_info->sampleLocationsCount);
> +       }
> +
> +       state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
> +                                             sample_locs->postSubpassSampleLocationsCount *
> +                                             sizeof(state->subpass_sample_locs[0]),
> +                                             8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> +       if (state->subpass_sample_locs == NULL) {
> +               cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
> +               return cmd_buffer->record_result;
> +       }
> +
> +       state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
> +
> +       for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
> +               const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
> +                       &sample_locs->pPostSubpassSampleLocations[i];
> +               const VkSampleLocationsInfoEXT *sample_locs_info =
> +                       &subpass_sample_locs_info->sampleLocationsInfo;
> +
> +               state->subpass_sample_locs[i].subpass_idx =
> +                       subpass_sample_locs_info->subpassIndex;
> +               state->subpass_sample_locs[i].sample_location.per_pixel =
> +                       sample_locs_info->sampleLocationsPerPixel;
> +               state->subpass_sample_locs[i].sample_location.grid_size =
> +                       sample_locs_info->sampleLocationGridSize;
> +               state->subpass_sample_locs[i].sample_location.count =
> +                       sample_locs_info->sampleLocationsCount;
> +               typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
> +                            sample_locs_info->pSampleLocations,
> +                            sample_locs_info->sampleLocationsCount);
> +       }
> +
> +       return VK_SUCCESS;
> +}
> +
>  static VkResult
>  radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>                                  struct radv_render_pass *pass,
> @@ -2741,6 +2873,7 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>                 }
>
>                 state->attachments[i].current_layout = att->initial_layout;
> +               state->attachments[i].sample_location.count = 0;
>         }
>
>         return VK_SUCCESS;
> @@ -3171,6 +3304,7 @@ VkResult radv_EndCommandBuffer(
>         si_cp_dma_wait_for_idle(cmd_buffer);
>
>         vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>
>         if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
>                 return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
> @@ -3669,19 +3803,6 @@ void radv_TrimCommandPool(
>         }
>  }
>
> -static uint32_t
> -radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
> -{
> -       struct radv_cmd_state *state = &cmd_buffer->state;
> -       uint32_t subpass_id = state->subpass - state->pass->subpasses;
> -
> -       /* The id of this subpass shouldn't exceed the number of subpasses in
> -        * this render pass minus 1.
> -        */
> -       assert(subpass_id < state->pass->subpass_count);
> -       return subpass_id;
> -}
> -
>  static void
>  radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
>                               uint32_t subpass_id)
> @@ -3751,6 +3872,10 @@ void radv_CmdBeginRenderPass(
>         if (result != VK_SUCCESS)
>                 return;
>
> +       result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
> +       if (result != VK_SUCCESS)
> +               return;
> +
>         radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
>  }
>
> @@ -4593,11 +4718,13 @@ void radv_CmdEndRenderPass(
>         radv_cmd_buffer_end_subpass(cmd_buffer);
>
>         vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>
>         cmd_buffer->state.pass = NULL;
>         cmd_buffer->state.subpass = NULL;
>         cmd_buffer->state.attachments = NULL;
>         cmd_buffer->state.framebuffer = NULL;
> +       cmd_buffer->state.subpass_sample_locs = NULL;
>  }
>
>  void radv_CmdEndRenderPass2KHR(
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 180f8b976ab..76cc79b387d 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1024,6 +1024,7 @@ struct radv_attachment_state {
>         uint32_t                                     cleared_views;
>         VkClearValue                                 clear_value;
>         VkImageLayout                                current_layout;
> +       struct radv_sample_locations_state           sample_location;
>  };
>
>  struct radv_descriptor_state {
> @@ -1035,6 +1036,11 @@ struct radv_descriptor_state {
>         uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
>  };
>
> +struct radv_subpass_sample_locs_state {
> +       uint32_t subpass_idx;
> +       struct radv_sample_locations_state sample_location;
> +};
> +
>  struct radv_cmd_state {
>         /* Vertex descriptors */
>         uint64_t                                      vb_va;
> @@ -1057,6 +1063,9 @@ struct radv_cmd_state {
>         struct radv_streamout_state                  streamout;
>         VkRect2D                                     render_area;
>
> +       uint32_t                                     num_subpass_sample_locs;
> +       struct radv_subpass_sample_locs_state *      subpass_sample_locs;
> +
>         /* Index buffer */
>         struct radv_buffer                           *index_buffer;
>         uint64_t                                     index_offset;
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 6/5/19 2:51 AM, Bas Nieuwenhuizen wrote:
> On Thu, May 30, 2019 at 4:02 PM Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>>  From the Vulkan spec 1.1.109:
>>
>>     "Some implementations may need to evaluate depth image values
>>      while performing image layout transitions. To accommodate this,
>>      instances of the VkSampleLocationsInfoEXT structure can be
>>      specified for each situation where an explicit or automatic
>>      layout transition has to take place. [...] and
>>      VkRenderPassSampleLocationsBeginInfoEXT can be chained from
>>      VkRenderPassBeginInfo to provide sample locations for layout
>>      transitions performed implicitly by a render pass instance."
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>>   src/amd/vulkan/radv_cmd_buffer.c | 155 ++++++++++++++++++++++++++++---
>>   src/amd/vulkan/radv_private.h    |   9 ++
>>   2 files changed, 150 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index 570acaa0905..81b3f5f9886 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -2645,11 +2645,55 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
>>                                                                NULL);
>>   }
>>
>> +static uint32_t
>> +radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
>> +{
>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>> +       uint32_t subpass_id = state->subpass - state->pass->subpasses;
>> +
>> +       /* The id of this subpass shouldn't exceed the number of subpasses in
>> +        * this render pass minus 1.
>> +        */
>> +       assert(subpass_id < state->pass->subpass_count);
>> +       return subpass_id;
>> +}
>> +
>> +static struct radv_sample_locations_state *
>> +radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
>> +                                    uint32_t att_idx)
>> +{
>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>> +       uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
> On the start of the first subpass this may not work as the subpass is
> not set yet?
Yes, this patch needs https://patchwork.freedesktop.org/series/61387/
>
>> +       struct radv_image_view *view = state->framebuffer->attachments[att_idx].attachment;
>> +
>> +       if (view->image->info.samples == 1)
>> +               return NULL;
>> +
>> +       if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
>> +               /* Return the initial sample locations if this is the initial
>> +                * layout transition of the given subpass attachemnt.
>> +                */
>> +               if (state->attachments[att_idx].sample_location.count > 0)
>> +                       return &state->attachments[att_idx].sample_location;
>> +       } else {
>> +               /* Otherwise return the subpass sample locations if defined. */
>> +               if (state->subpass_sample_locs) {
>> +                       for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
>> +                               if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
>> +                                       return &state->subpass_sample_locs[i].sample_location;
>> +                       }
>> +               }
>> +       }
>> +
>> +       return NULL;
>> +}
>> +
>>   static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
>>                                                   struct radv_subpass_attachment att)
>>   {
>>          unsigned idx = att.attachment;
>>          struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment;
>> +       struct radv_sample_locations_state *sample_locs;
>>          VkImageSubresourceRange range;
>>          range.aspectMask = 0;
>>          range.baseMipLevel = view->base_mip;
>> @@ -2668,10 +2712,15 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
>>                  range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
>>          }
>>
>> +       /* Get the subpass sample locations for the given attachment, if NULL
>> +        * is returned the driver will use the default HW locations.
>> +        */
>> +       sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx);
>> +
>>          radv_handle_image_transition(cmd_buffer,
>>                                       view->image,
>>                                       cmd_buffer->state.attachments[idx].current_layout,
>> -                                    att.layout, 0, 0, &range, NULL);
>> +                                    att.layout, 0, 0, &range, sample_locs);
>>
>>          cmd_buffer->state.attachments[idx].current_layout = att.layout;
>>
>> @@ -2687,6 +2736,89 @@ radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
>>          cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
>>   }
>>
>> +static VkResult
>> +radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
>> +                                     struct radv_render_pass *pass,
>> +                                     const VkRenderPassBeginInfo *info)
>> +{
>> +       const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
>> +               vk_find_struct_const(info->pNext,
>> +                                    RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>> +       struct radv_framebuffer *framebuffer = state->framebuffer;
>> +
>> +       if (!sample_locs) {
>> +               state->subpass_sample_locs = NULL;
>> +               return VK_SUCCESS;
>> +       }
>> +
>> +       for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
>> +               const VkAttachmentSampleLocationsEXT *att_sample_locs =
>> +                       &sample_locs->pAttachmentInitialSampleLocations[i];
>> +               uint32_t att_idx = att_sample_locs->attachmentIndex;
>> +               struct radv_attachment_info *att = &framebuffer->attachments[att_idx];
>> +               struct radv_image *image = att->attachment->image;
>> +
>> +               assert(vk_format_is_depth_or_stencil(image->vk_format));
>> +
>> +               /* From the Vulkan spec 1.1.108:
>> +                *
>> +                * "If the image referenced by the framebuffer attachment at
>> +                *  index attachmentIndex was not created with
>> +                *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
>> +                *  then the values specified in sampleLocationsInfo are
>> +                *  ignored."
>> +                */
>> +               if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
>> +                       continue;
>> +
>> +               const VkSampleLocationsInfoEXT *sample_locs_info =
>> +                       &att_sample_locs->sampleLocationsInfo;
>> +
>> +               state->attachments[att_idx].sample_location.per_pixel =
>> +                       sample_locs_info->sampleLocationsPerPixel;
>> +               state->attachments[att_idx].sample_location.grid_size =
>> +                       sample_locs_info->sampleLocationGridSize;
>> +               state->attachments[att_idx].sample_location.count =
>> +                       sample_locs_info->sampleLocationsCount;
>> +               typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
>> +                            sample_locs_info->pSampleLocations,
>> +                            sample_locs_info->sampleLocationsCount);
>> +       }
>> +
>> +       state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
>> +                                             sample_locs->postSubpassSampleLocationsCount *
>> +                                             sizeof(state->subpass_sample_locs[0]),
>> +                                             8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
>> +       if (state->subpass_sample_locs == NULL) {
>> +               cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
>> +               return cmd_buffer->record_result;
>> +       }
>> +
>> +       state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
>> +
>> +       for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
>> +               const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
>> +                       &sample_locs->pPostSubpassSampleLocations[i];
>> +               const VkSampleLocationsInfoEXT *sample_locs_info =
>> +                       &subpass_sample_locs_info->sampleLocationsInfo;
>> +
>> +               state->subpass_sample_locs[i].subpass_idx =
>> +                       subpass_sample_locs_info->subpassIndex;
>> +               state->subpass_sample_locs[i].sample_location.per_pixel =
>> +                       sample_locs_info->sampleLocationsPerPixel;
>> +               state->subpass_sample_locs[i].sample_location.grid_size =
>> +                       sample_locs_info->sampleLocationGridSize;
>> +               state->subpass_sample_locs[i].sample_location.count =
>> +                       sample_locs_info->sampleLocationsCount;
>> +               typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
>> +                            sample_locs_info->pSampleLocations,
>> +                            sample_locs_info->sampleLocationsCount);
>> +       }
>> +
>> +       return VK_SUCCESS;
>> +}
>> +
>>   static VkResult
>>   radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>>                                   struct radv_render_pass *pass,
>> @@ -2741,6 +2873,7 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>>                  }
>>
>>                  state->attachments[i].current_layout = att->initial_layout;
>> +               state->attachments[i].sample_location.count = 0;
>>          }
>>
>>          return VK_SUCCESS;
>> @@ -3171,6 +3304,7 @@ VkResult radv_EndCommandBuffer(
>>          si_cp_dma_wait_for_idle(cmd_buffer);
>>
>>          vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
>> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>>
>>          if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
>>                  return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
>> @@ -3669,19 +3803,6 @@ void radv_TrimCommandPool(
>>          }
>>   }
>>
>> -static uint32_t
>> -radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
>> -{
>> -       struct radv_cmd_state *state = &cmd_buffer->state;
>> -       uint32_t subpass_id = state->subpass - state->pass->subpasses;
>> -
>> -       /* The id of this subpass shouldn't exceed the number of subpasses in
>> -        * this render pass minus 1.
>> -        */
>> -       assert(subpass_id < state->pass->subpass_count);
>> -       return subpass_id;
>> -}
>> -
>>   static void
>>   radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
>>                                uint32_t subpass_id)
>> @@ -3751,6 +3872,10 @@ void radv_CmdBeginRenderPass(
>>          if (result != VK_SUCCESS)
>>                  return;
>>
>> +       result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
>> +       if (result != VK_SUCCESS)
>> +               return;
>> +
>>          radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
>>   }
>>
>> @@ -4593,11 +4718,13 @@ void radv_CmdEndRenderPass(
>>          radv_cmd_buffer_end_subpass(cmd_buffer);
>>
>>          vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
>> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>>
>>          cmd_buffer->state.pass = NULL;
>>          cmd_buffer->state.subpass = NULL;
>>          cmd_buffer->state.attachments = NULL;
>>          cmd_buffer->state.framebuffer = NULL;
>> +       cmd_buffer->state.subpass_sample_locs = NULL;
>>   }
>>
>>   void radv_CmdEndRenderPass2KHR(
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>> index 180f8b976ab..76cc79b387d 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -1024,6 +1024,7 @@ struct radv_attachment_state {
>>          uint32_t                                     cleared_views;
>>          VkClearValue                                 clear_value;
>>          VkImageLayout                                current_layout;
>> +       struct radv_sample_locations_state           sample_location;
>>   };
>>
>>   struct radv_descriptor_state {
>> @@ -1035,6 +1036,11 @@ struct radv_descriptor_state {
>>          uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
>>   };
>>
>> +struct radv_subpass_sample_locs_state {
>> +       uint32_t subpass_idx;
>> +       struct radv_sample_locations_state sample_location;
>> +};
>> +
>>   struct radv_cmd_state {
>>          /* Vertex descriptors */
>>          uint64_t                                      vb_va;
>> @@ -1057,6 +1063,9 @@ struct radv_cmd_state {
>>          struct radv_streamout_state                  streamout;
>>          VkRect2D                                     render_area;
>>
>> +       uint32_t                                     num_subpass_sample_locs;
>> +       struct radv_subpass_sample_locs_state *      subpass_sample_locs;
>> +
>>          /* Index buffer */
>>          struct radv_buffer                           *index_buffer;
>>          uint64_t                                     index_offset;
>> --
>> 2.21.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Wed, Jun 5, 2019 at 12:04 PM Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
>
> On 6/5/19 2:51 AM, Bas Nieuwenhuizen wrote:
> > On Thu, May 30, 2019 at 4:02 PM Samuel Pitoiset
> > <samuel.pitoiset@gmail.com> wrote:
> >>  From the Vulkan spec 1.1.109:
> >>
> >>     "Some implementations may need to evaluate depth image values
> >>      while performing image layout transitions. To accommodate this,
> >>      instances of the VkSampleLocationsInfoEXT structure can be
> >>      specified for each situation where an explicit or automatic
> >>      layout transition has to take place. [...] and
> >>      VkRenderPassSampleLocationsBeginInfoEXT can be chained from
> >>      VkRenderPassBeginInfo to provide sample locations for layout
> >>      transitions performed implicitly by a render pass instance."
> >>
> >> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> >> ---
> >>   src/amd/vulkan/radv_cmd_buffer.c | 155 ++++++++++++++++++++++++++++---
> >>   src/amd/vulkan/radv_private.h    |   9 ++
> >>   2 files changed, 150 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> >> index 570acaa0905..81b3f5f9886 100644
> >> --- a/src/amd/vulkan/radv_cmd_buffer.c
> >> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> >> @@ -2645,11 +2645,55 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
> >>                                                                NULL);
> >>   }
> >>
> >> +static uint32_t
> >> +radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
> >> +{
> >> +       struct radv_cmd_state *state = &cmd_buffer->state;
> >> +       uint32_t subpass_id = state->subpass - state->pass->subpasses;
> >> +
> >> +       /* The id of this subpass shouldn't exceed the number of subpasses in
> >> +        * this render pass minus 1.
> >> +        */
> >> +       assert(subpass_id < state->pass->subpass_count);
> >> +       return subpass_id;
> >> +}
> >> +
> >> +static struct radv_sample_locations_state *
> >> +radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
> >> +                                    uint32_t att_idx)
> >> +{
> >> +       struct radv_cmd_state *state = &cmd_buffer->state;
> >> +       uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
> > On the start of the first subpass this may not work as the subpass is
> > not set yet?
> Yes, this patch needs https://patchwork.freedesktop.org/series/61387/
> >
> >> +       struct radv_image_view *view = state->framebuffer->attachments[att_idx].attachment;
> >> +
> >> +       if (view->image->info.samples == 1)
> >> +               return NULL;
> >> +
> >> +       if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
> >> +               /* Return the initial sample locations if this is the initial
> >> +                * layout transition of the given subpass attachemnt.
> >> +                */
> >> +               if (state->attachments[att_idx].sample_location.count > 0)
> >> +                       return &state->attachments[att_idx].sample_location;
> >> +       } else {
> >> +               /* Otherwise return the subpass sample locations if defined. */
> >> +               if (state->subpass_sample_locs) {
> >> +                       for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
> >> +                               if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
> >> +                                       return &state->subpass_sample_locs[i].sample_location;

I think there is an off-by-1 here with the subpass patch applied.

For the transition from subpass 0 to subpass 1, we should be using the
locations from subpass 0, but by setting the subpass before the
transitions, the subpass_id is 1.

> >> +                       }
> >> +               }
> >> +       }
> >> +
> >> +       return NULL;
> >> +}
> >> +
> >>   static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
> >>                                                   struct radv_subpass_attachment att)
> >>   {
> >>          unsigned idx = att.attachment;
> >>          struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment;
> >> +       struct radv_sample_locations_state *sample_locs;
> >>          VkImageSubresourceRange range;
> >>          range.aspectMask = 0;
> >>          range.baseMipLevel = view->base_mip;
> >> @@ -2668,10 +2712,15 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
> >>                  range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
> >>          }
> >>
> >> +       /* Get the subpass sample locations for the given attachment, if NULL
> >> +        * is returned the driver will use the default HW locations.
> >> +        */
> >> +       sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx);
> >> +
> >>          radv_handle_image_transition(cmd_buffer,
> >>                                       view->image,
> >>                                       cmd_buffer->state.attachments[idx].current_layout,
> >> -                                    att.layout, 0, 0, &range, NULL);
> >> +                                    att.layout, 0, 0, &range, sample_locs);
> >>
> >>          cmd_buffer->state.attachments[idx].current_layout = att.layout;
> >>
> >> @@ -2687,6 +2736,89 @@ radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
> >>          cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
> >>   }
> >>
> >> +static VkResult
> >> +radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
> >> +                                     struct radv_render_pass *pass,
> >> +                                     const VkRenderPassBeginInfo *info)
> >> +{
> >> +       const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
> >> +               vk_find_struct_const(info->pNext,
> >> +                                    RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
> >> +       struct radv_cmd_state *state = &cmd_buffer->state;
> >> +       struct radv_framebuffer *framebuffer = state->framebuffer;
> >> +
> >> +       if (!sample_locs) {
> >> +               state->subpass_sample_locs = NULL;
> >> +               return VK_SUCCESS;
> >> +       }
> >> +
> >> +       for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
> >> +               const VkAttachmentSampleLocationsEXT *att_sample_locs =
> >> +                       &sample_locs->pAttachmentInitialSampleLocations[i];
> >> +               uint32_t att_idx = att_sample_locs->attachmentIndex;
> >> +               struct radv_attachment_info *att = &framebuffer->attachments[att_idx];
> >> +               struct radv_image *image = att->attachment->image;
> >> +
> >> +               assert(vk_format_is_depth_or_stencil(image->vk_format));
> >> +
> >> +               /* From the Vulkan spec 1.1.108:
> >> +                *
> >> +                * "If the image referenced by the framebuffer attachment at
> >> +                *  index attachmentIndex was not created with
> >> +                *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
> >> +                *  then the values specified in sampleLocationsInfo are
> >> +                *  ignored."
> >> +                */
> >> +               if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
> >> +                       continue;
> >> +
> >> +               const VkSampleLocationsInfoEXT *sample_locs_info =
> >> +                       &att_sample_locs->sampleLocationsInfo;
> >> +
> >> +               state->attachments[att_idx].sample_location.per_pixel =
> >> +                       sample_locs_info->sampleLocationsPerPixel;
> >> +               state->attachments[att_idx].sample_location.grid_size =
> >> +                       sample_locs_info->sampleLocationGridSize;
> >> +               state->attachments[att_idx].sample_location.count =
> >> +                       sample_locs_info->sampleLocationsCount;
> >> +               typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
> >> +                            sample_locs_info->pSampleLocations,
> >> +                            sample_locs_info->sampleLocationsCount);
> >> +       }
> >> +
> >> +       state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
> >> +                                             sample_locs->postSubpassSampleLocationsCount *
> >> +                                             sizeof(state->subpass_sample_locs[0]),
> >> +                                             8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> >> +       if (state->subpass_sample_locs == NULL) {
> >> +               cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
> >> +               return cmd_buffer->record_result;
> >> +       }
> >> +
> >> +       state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
> >> +
> >> +       for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
> >> +               const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
> >> +                       &sample_locs->pPostSubpassSampleLocations[i];
> >> +               const VkSampleLocationsInfoEXT *sample_locs_info =
> >> +                       &subpass_sample_locs_info->sampleLocationsInfo;
> >> +
> >> +               state->subpass_sample_locs[i].subpass_idx =
> >> +                       subpass_sample_locs_info->subpassIndex;
> >> +               state->subpass_sample_locs[i].sample_location.per_pixel =
> >> +                       sample_locs_info->sampleLocationsPerPixel;
> >> +               state->subpass_sample_locs[i].sample_location.grid_size =
> >> +                       sample_locs_info->sampleLocationGridSize;
> >> +               state->subpass_sample_locs[i].sample_location.count =
> >> +                       sample_locs_info->sampleLocationsCount;
> >> +               typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
> >> +                            sample_locs_info->pSampleLocations,
> >> +                            sample_locs_info->sampleLocationsCount);
> >> +       }
> >> +
> >> +       return VK_SUCCESS;
> >> +}
> >> +
> >>   static VkResult
> >>   radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
> >>                                   struct radv_render_pass *pass,
> >> @@ -2741,6 +2873,7 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
> >>                  }
> >>
> >>                  state->attachments[i].current_layout = att->initial_layout;
> >> +               state->attachments[i].sample_location.count = 0;
> >>          }
> >>
> >>          return VK_SUCCESS;
> >> @@ -3171,6 +3304,7 @@ VkResult radv_EndCommandBuffer(
> >>          si_cp_dma_wait_for_idle(cmd_buffer);
> >>
> >>          vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
> >> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
> >>
> >>          if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
> >>                  return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
> >> @@ -3669,19 +3803,6 @@ void radv_TrimCommandPool(
> >>          }
> >>   }
> >>
> >> -static uint32_t
> >> -radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
> >> -{
> >> -       struct radv_cmd_state *state = &cmd_buffer->state;
> >> -       uint32_t subpass_id = state->subpass - state->pass->subpasses;
> >> -
> >> -       /* The id of this subpass shouldn't exceed the number of subpasses in
> >> -        * this render pass minus 1.
> >> -        */
> >> -       assert(subpass_id < state->pass->subpass_count);
> >> -       return subpass_id;
> >> -}
> >> -
> >>   static void
> >>   radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
> >>                                uint32_t subpass_id)
> >> @@ -3751,6 +3872,10 @@ void radv_CmdBeginRenderPass(
> >>          if (result != VK_SUCCESS)
> >>                  return;
> >>
> >> +       result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
> >> +       if (result != VK_SUCCESS)
> >> +               return;
> >> +
> >>          radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
> >>   }
> >>
> >> @@ -4593,11 +4718,13 @@ void radv_CmdEndRenderPass(
> >>          radv_cmd_buffer_end_subpass(cmd_buffer);
> >>
> >>          vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
> >> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
> >>
> >>          cmd_buffer->state.pass = NULL;
> >>          cmd_buffer->state.subpass = NULL;
> >>          cmd_buffer->state.attachments = NULL;
> >>          cmd_buffer->state.framebuffer = NULL;
> >> +       cmd_buffer->state.subpass_sample_locs = NULL;
> >>   }
> >>
> >>   void radv_CmdEndRenderPass2KHR(
> >> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> >> index 180f8b976ab..76cc79b387d 100644
> >> --- a/src/amd/vulkan/radv_private.h
> >> +++ b/src/amd/vulkan/radv_private.h
> >> @@ -1024,6 +1024,7 @@ struct radv_attachment_state {
> >>          uint32_t                                     cleared_views;
> >>          VkClearValue                                 clear_value;
> >>          VkImageLayout                                current_layout;
> >> +       struct radv_sample_locations_state           sample_location;
> >>   };
> >>
> >>   struct radv_descriptor_state {
> >> @@ -1035,6 +1036,11 @@ struct radv_descriptor_state {
> >>          uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
> >>   };
> >>
> >> +struct radv_subpass_sample_locs_state {
> >> +       uint32_t subpass_idx;
> >> +       struct radv_sample_locations_state sample_location;
> >> +};
> >> +
> >>   struct radv_cmd_state {
> >>          /* Vertex descriptors */
> >>          uint64_t                                      vb_va;
> >> @@ -1057,6 +1063,9 @@ struct radv_cmd_state {
> >>          struct radv_streamout_state                  streamout;
> >>          VkRect2D                                     render_area;
> >>
> >> +       uint32_t                                     num_subpass_sample_locs;
> >> +       struct radv_subpass_sample_locs_state *      subpass_sample_locs;
> >> +
> >>          /* Index buffer */
> >>          struct radv_buffer                           *index_buffer;
> >>          uint64_t                                     index_offset;
> >> --
> >> 2.21.0
> >>
> >> _______________________________________________
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 6/6/19 3:41 AM, Bas Nieuwenhuizen wrote:
> On Wed, Jun 5, 2019 at 12:04 PM Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>>
>> On 6/5/19 2:51 AM, Bas Nieuwenhuizen wrote:
>>> On Thu, May 30, 2019 at 4:02 PM Samuel Pitoiset
>>> <samuel.pitoiset@gmail.com> wrote:
>>>>   From the Vulkan spec 1.1.109:
>>>>
>>>>      "Some implementations may need to evaluate depth image values
>>>>       while performing image layout transitions. To accommodate this,
>>>>       instances of the VkSampleLocationsInfoEXT structure can be
>>>>       specified for each situation where an explicit or automatic
>>>>       layout transition has to take place. [...] and
>>>>       VkRenderPassSampleLocationsBeginInfoEXT can be chained from
>>>>       VkRenderPassBeginInfo to provide sample locations for layout
>>>>       transitions performed implicitly by a render pass instance."
>>>>
>>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>>>> ---
>>>>    src/amd/vulkan/radv_cmd_buffer.c | 155 ++++++++++++++++++++++++++++---
>>>>    src/amd/vulkan/radv_private.h    |   9 ++
>>>>    2 files changed, 150 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>>>> index 570acaa0905..81b3f5f9886 100644
>>>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>>>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>>>> @@ -2645,11 +2645,55 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
>>>>                                                                 NULL);
>>>>    }
>>>>
>>>> +static uint32_t
>>>> +radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
>>>> +{
>>>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>>>> +       uint32_t subpass_id = state->subpass - state->pass->subpasses;
>>>> +
>>>> +       /* The id of this subpass shouldn't exceed the number of subpasses in
>>>> +        * this render pass minus 1.
>>>> +        */
>>>> +       assert(subpass_id < state->pass->subpass_count);
>>>> +       return subpass_id;
>>>> +}
>>>> +
>>>> +static struct radv_sample_locations_state *
>>>> +radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
>>>> +                                    uint32_t att_idx)
>>>> +{
>>>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>>>> +       uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
>>> On the start of the first subpass this may not work as the subpass is
>>> not set yet?
>> Yes, this patch needs https://patchwork.freedesktop.org/series/61387/
>>>> +       struct radv_image_view *view = state->framebuffer->attachments[att_idx].attachment;
>>>> +
>>>> +       if (view->image->info.samples == 1)
>>>> +               return NULL;
>>>> +
>>>> +       if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
>>>> +               /* Return the initial sample locations if this is the initial
>>>> +                * layout transition of the given subpass attachemnt.
>>>> +                */
>>>> +               if (state->attachments[att_idx].sample_location.count > 0)
>>>> +                       return &state->attachments[att_idx].sample_location;
>>>> +       } else {
>>>> +               /* Otherwise return the subpass sample locations if defined. */
>>>> +               if (state->subpass_sample_locs) {
>>>> +                       for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
>>>> +                               if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
>>>> +                                       return &state->subpass_sample_locs[i].sample_location;
> I think there is an off-by-1 here with the subpass patch applied.
>
> For the transition from subpass 0 to subpass 1, we should be using the
> locations from subpass 0, but by setting the subpass before the
> transitions, the subpass_id is 1.

No?

For the transition from subpass 0 to subpass 1, state->subpass should be 
equal to state->pass->subpasses, so radv_get_subpass_id() returns 0?

>
>>>> +                       }
>>>> +               }
>>>> +       }
>>>> +
>>>> +       return NULL;
>>>> +}
>>>> +
>>>>    static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
>>>>                                                    struct radv_subpass_attachment att)
>>>>    {
>>>>           unsigned idx = att.attachment;
>>>>           struct radv_image_view *view = cmd_buffer->state.framebuffer->attachments[idx].attachment;
>>>> +       struct radv_sample_locations_state *sample_locs;
>>>>           VkImageSubresourceRange range;
>>>>           range.aspectMask = 0;
>>>>           range.baseMipLevel = view->base_mip;
>>>> @@ -2668,10 +2712,15 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf
>>>>                   range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
>>>>           }
>>>>
>>>> +       /* Get the subpass sample locations for the given attachment, if NULL
>>>> +        * is returned the driver will use the default HW locations.
>>>> +        */
>>>> +       sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx);
>>>> +
>>>>           radv_handle_image_transition(cmd_buffer,
>>>>                                        view->image,
>>>>                                        cmd_buffer->state.attachments[idx].current_layout,
>>>> -                                    att.layout, 0, 0, &range, NULL);
>>>> +                                    att.layout, 0, 0, &range, sample_locs);
>>>>
>>>>           cmd_buffer->state.attachments[idx].current_layout = att.layout;
>>>>
>>>> @@ -2687,6 +2736,89 @@ radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
>>>>           cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
>>>>    }
>>>>
>>>> +static VkResult
>>>> +radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
>>>> +                                     struct radv_render_pass *pass,
>>>> +                                     const VkRenderPassBeginInfo *info)
>>>> +{
>>>> +       const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
>>>> +               vk_find_struct_const(info->pNext,
>>>> +                                    RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
>>>> +       struct radv_cmd_state *state = &cmd_buffer->state;
>>>> +       struct radv_framebuffer *framebuffer = state->framebuffer;
>>>> +
>>>> +       if (!sample_locs) {
>>>> +               state->subpass_sample_locs = NULL;
>>>> +               return VK_SUCCESS;
>>>> +       }
>>>> +
>>>> +       for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
>>>> +               const VkAttachmentSampleLocationsEXT *att_sample_locs =
>>>> +                       &sample_locs->pAttachmentInitialSampleLocations[i];
>>>> +               uint32_t att_idx = att_sample_locs->attachmentIndex;
>>>> +               struct radv_attachment_info *att = &framebuffer->attachments[att_idx];
>>>> +               struct radv_image *image = att->attachment->image;
>>>> +
>>>> +               assert(vk_format_is_depth_or_stencil(image->vk_format));
>>>> +
>>>> +               /* From the Vulkan spec 1.1.108:
>>>> +                *
>>>> +                * "If the image referenced by the framebuffer attachment at
>>>> +                *  index attachmentIndex was not created with
>>>> +                *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
>>>> +                *  then the values specified in sampleLocationsInfo are
>>>> +                *  ignored."
>>>> +                */
>>>> +               if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
>>>> +                       continue;
>>>> +
>>>> +               const VkSampleLocationsInfoEXT *sample_locs_info =
>>>> +                       &att_sample_locs->sampleLocationsInfo;
>>>> +
>>>> +               state->attachments[att_idx].sample_location.per_pixel =
>>>> +                       sample_locs_info->sampleLocationsPerPixel;
>>>> +               state->attachments[att_idx].sample_location.grid_size =
>>>> +                       sample_locs_info->sampleLocationGridSize;
>>>> +               state->attachments[att_idx].sample_location.count =
>>>> +                       sample_locs_info->sampleLocationsCount;
>>>> +               typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
>>>> +                            sample_locs_info->pSampleLocations,
>>>> +                            sample_locs_info->sampleLocationsCount);
>>>> +       }
>>>> +
>>>> +       state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
>>>> +                                             sample_locs->postSubpassSampleLocationsCount *
>>>> +                                             sizeof(state->subpass_sample_locs[0]),
>>>> +                                             8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
>>>> +       if (state->subpass_sample_locs == NULL) {
>>>> +               cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
>>>> +               return cmd_buffer->record_result;
>>>> +       }
>>>> +
>>>> +       state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
>>>> +
>>>> +       for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
>>>> +               const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
>>>> +                       &sample_locs->pPostSubpassSampleLocations[i];
>>>> +               const VkSampleLocationsInfoEXT *sample_locs_info =
>>>> +                       &subpass_sample_locs_info->sampleLocationsInfo;
>>>> +
>>>> +               state->subpass_sample_locs[i].subpass_idx =
>>>> +                       subpass_sample_locs_info->subpassIndex;
>>>> +               state->subpass_sample_locs[i].sample_location.per_pixel =
>>>> +                       sample_locs_info->sampleLocationsPerPixel;
>>>> +               state->subpass_sample_locs[i].sample_location.grid_size =
>>>> +                       sample_locs_info->sampleLocationGridSize;
>>>> +               state->subpass_sample_locs[i].sample_location.count =
>>>> +                       sample_locs_info->sampleLocationsCount;
>>>> +               typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
>>>> +                            sample_locs_info->pSampleLocations,
>>>> +                            sample_locs_info->sampleLocationsCount);
>>>> +       }
>>>> +
>>>> +       return VK_SUCCESS;
>>>> +}
>>>> +
>>>>    static VkResult
>>>>    radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>>>>                                    struct radv_render_pass *pass,
>>>> @@ -2741,6 +2873,7 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
>>>>                   }
>>>>
>>>>                   state->attachments[i].current_layout = att->initial_layout;
>>>> +               state->attachments[i].sample_location.count = 0;
>>>>           }
>>>>
>>>>           return VK_SUCCESS;
>>>> @@ -3171,6 +3304,7 @@ VkResult radv_EndCommandBuffer(
>>>>           si_cp_dma_wait_for_idle(cmd_buffer);
>>>>
>>>>           vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
>>>> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>>>>
>>>>           if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
>>>>                   return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
>>>> @@ -3669,19 +3803,6 @@ void radv_TrimCommandPool(
>>>>           }
>>>>    }
>>>>
>>>> -static uint32_t
>>>> -radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
>>>> -{
>>>> -       struct radv_cmd_state *state = &cmd_buffer->state;
>>>> -       uint32_t subpass_id = state->subpass - state->pass->subpasses;
>>>> -
>>>> -       /* The id of this subpass shouldn't exceed the number of subpasses in
>>>> -        * this render pass minus 1.
>>>> -        */
>>>> -       assert(subpass_id < state->pass->subpass_count);
>>>> -       return subpass_id;
>>>> -}
>>>> -
>>>>    static void
>>>>    radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
>>>>                                 uint32_t subpass_id)
>>>> @@ -3751,6 +3872,10 @@ void radv_CmdBeginRenderPass(
>>>>           if (result != VK_SUCCESS)
>>>>                   return;
>>>>
>>>> +       result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
>>>> +       if (result != VK_SUCCESS)
>>>> +               return;
>>>> +
>>>>           radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
>>>>    }
>>>>
>>>> @@ -4593,11 +4718,13 @@ void radv_CmdEndRenderPass(
>>>>           radv_cmd_buffer_end_subpass(cmd_buffer);
>>>>
>>>>           vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
>>>> +       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
>>>>
>>>>           cmd_buffer->state.pass = NULL;
>>>>           cmd_buffer->state.subpass = NULL;
>>>>           cmd_buffer->state.attachments = NULL;
>>>>           cmd_buffer->state.framebuffer = NULL;
>>>> +       cmd_buffer->state.subpass_sample_locs = NULL;
>>>>    }
>>>>
>>>>    void radv_CmdEndRenderPass2KHR(
>>>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>>>> index 180f8b976ab..76cc79b387d 100644
>>>> --- a/src/amd/vulkan/radv_private.h
>>>> +++ b/src/amd/vulkan/radv_private.h
>>>> @@ -1024,6 +1024,7 @@ struct radv_attachment_state {
>>>>           uint32_t                                     cleared_views;
>>>>           VkClearValue                                 clear_value;
>>>>           VkImageLayout                                current_layout;
>>>> +       struct radv_sample_locations_state           sample_location;
>>>>    };
>>>>
>>>>    struct radv_descriptor_state {
>>>> @@ -1035,6 +1036,11 @@ struct radv_descriptor_state {
>>>>           uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
>>>>    };
>>>>
>>>> +struct radv_subpass_sample_locs_state {
>>>> +       uint32_t subpass_idx;
>>>> +       struct radv_sample_locations_state sample_location;
>>>> +};
>>>> +
>>>>    struct radv_cmd_state {
>>>>           /* Vertex descriptors */
>>>>           uint64_t                                      vb_va;
>>>> @@ -1057,6 +1063,9 @@ struct radv_cmd_state {
>>>>           struct radv_streamout_state                  streamout;
>>>>           VkRect2D                                     render_area;
>>>>
>>>> +       uint32_t                                     num_subpass_sample_locs;
>>>> +       struct radv_subpass_sample_locs_state *      subpass_sample_locs;
>>>> +
>>>>           /* Index buffer */
>>>>           struct radv_buffer                           *index_buffer;
>>>>           uint64_t                                     index_offset;
>>>> --
>>>> 2.21.0
>>>>
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev