radv: rework the TC-compat HTILE hardware bug with COND_EXEC

Submitted by Samuel Pitoiset on Dec. 3, 2018, 10:15 p.m.

Details

Message ID 20181203221509.2765-1-samuel.pitoiset@gmail.com
State Accepted
Headers show
Series "radv: rework the TC-compat HTILE hardware bug with COND_EXEC" ( rev: 1 ) in Mesa

Browsing this patch as part of:
"radv: rework the TC-compat HTILE hardware bug with COND_EXEC" rev 1 in Mesa
<< prev patch [1/1] next patch >>

Commit Message

Samuel Pitoiset Dec. 3, 2018, 10:15 p.m.
After investigating on this, it appears that COND_WRITE doesn't
work correctly in some situations. I don't know exactly why does
it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
also uses COND_EXEC I think there is a reason.

Now the driver stores a new metadata value in order to reflect
the last fast depth clear state. If a TC-compat HTILE is fast cleared
with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
work around that hardware bug.

This fixes rendering issues with The Forest and DXVK and doesn't
seem to introduce any regressions.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++----------
 src/amd/vulkan/radv_image.c      | 10 +++-
 src/amd/vulkan/radv_private.h    |  2 +
 3 files changed, 75 insertions(+), 28 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index ccf762ead46..23909a0f7dd 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1067,7 +1067,7 @@  static void
 radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
 			     struct radv_ds_buffer_info *ds,
 			     struct radv_image *image, VkImageLayout layout,
-			     bool requires_cond_write)
+			     bool requires_cond_exec)
 {
 	uint32_t db_z_info = ds->db_z_info;
 	uint32_t db_z_info_reg;
@@ -1091,38 +1091,21 @@  radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
 	}
 
 	/* When we don't know the last fast clear value we need to emit a
-	 * conditional packet, otherwise we can update DB_Z_INFO directly.
+	 * conditional packet that will eventually skip the following
+	 * SET_CONTEXT_REG packet.
 	 */
-	if (requires_cond_write) {
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
-
-		const uint32_t write_space = 0 << 8;	/* register */
-		const uint32_t poll_space = 1 << 4;	/* memory */
-		const uint32_t function = 3 << 0;	/* equal to the reference */
-		const uint32_t options = write_space | poll_space | function;
-		radeon_emit(cmd_buffer->cs, options);
-
-		/* poll address - location of the depth clear value */
+	if (requires_cond_exec) {
 		uint64_t va = radv_buffer_get_va(image->bo);
-		va += image->offset + image->clear_value_offset;
-
-		/* In presence of stencil format, we have to adjust the base
-		 * address because the first value is the stencil clear value.
-		 */
-		if (vk_format_is_stencil(image->vk_format))
-			va += 4;
+		va += image->offset + image->tc_compat_zrange_offset;
 
+		radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
 		radeon_emit(cmd_buffer->cs, va);
 		radeon_emit(cmd_buffer->cs, va >> 32);
-
-		radeon_emit(cmd_buffer->cs, fui(0.0f));		 /* reference value */
-		radeon_emit(cmd_buffer->cs, (uint32_t)-1);	 /* comparison mask */
-		radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
-		radeon_emit(cmd_buffer->cs, 0u);		 /* write address high */
-		radeon_emit(cmd_buffer->cs, db_z_info);
-	} else {
-		radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
+		radeon_emit(cmd_buffer->cs, 0);
+		radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
 	}
+
+	radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
 }
 
 static void
@@ -1269,6 +1252,45 @@  radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 		radeon_emit(cs, fui(ds_clear_value.depth));
 }
 
+/**
+ * Update the TC-compat metadata value for this image.
+ */
+static void
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+				   struct radv_image *image,
+				   uint32_t value)
+{
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint64_t va = radv_buffer_get_va(image->bo);
+	va += image->offset + image->tc_compat_zrange_offset;
+
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+			S_370_WR_CONFIRM(1) |
+			S_370_ENGINE_SEL(V_370_PFP));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit(cs, value);
+}
+
+static void
+radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+				      struct radv_image *image,
+				      VkClearDepthStencilValue ds_clear_value)
+{
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint64_t va = radv_buffer_get_va(image->bo);
+	va += image->offset + image->tc_compat_zrange_offset;
+	uint32_t cond_val;
+
+	/* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+	 * depth clear value is 0.0f.
+	 */
+	cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+
+	radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
+}
+
 /**
  * Update the clear depth/stencil values for this image.
  */
@@ -1282,6 +1304,12 @@  radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 
 	radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
 
+	if (radv_image_is_tc_compat_htile(image) &&
+	    (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+		radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
+						      ds_clear_value);
+	}
+
 	radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
 				        aspects);
 }
@@ -4229,6 +4257,15 @@  static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
 		aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
 
 	radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
+
+	if (radv_image_is_tc_compat_htile(image)) {
+		/* Initialize the TC-compat metada value to 0 because by
+		 * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+		 * need have to conditionally update its value when performing
+		 * a fast depth clear.
+		 */
+		radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
+	}
 }
 
 static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index f447166d80c..9fcbaa3184c 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -870,6 +870,14 @@  radv_image_alloc_htile(struct radv_image *image)
 	/* + 8 for storing the clear values */
 	image->clear_value_offset = image->htile_offset + image->surface.htile_size;
 	image->size = image->clear_value_offset + 8;
+	if (radv_image_is_tc_compat_htile(image)) {
+		/* Metadata for the TC-compatible HTILE hardware bug which
+		 * have to be fixed by updating ZRANGE_PRECISION when doing
+		 * fast depth clears to 0.0f.
+		 */
+		image->tc_compat_zrange_offset = image->clear_value_offset + 8;
+		image->size = image->clear_value_offset + 16;
+	}
 	image->alignment = align64(image->alignment, image->surface.htile_alignment);
 }
 
@@ -1014,8 +1022,8 @@  radv_image_create(VkDevice _device,
 			/* Otherwise, try to enable HTILE for depth surfaces. */
 			if (radv_image_can_enable_htile(image) &&
 			    !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
-				radv_image_alloc_htile(image);
 				image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+				radv_image_alloc_htile(image);
 			} else {
 				image->surface.htile_size = 0;
 			}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ac756f2c247..1ef5b215db7 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1503,6 +1503,8 @@  struct radv_image {
 	uint64_t clear_value_offset;
 	uint64_t fce_pred_offset;
 
+	uint64_t tc_compat_zrange_offset;
+
 	/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
 	VkDeviceMemory owned_memory;
 };

Comments

On Mon, Dec 3, 2018 at 11:13 PM Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
> After investigating on this, it appears that COND_WRITE doesn't
> work correctly in some situations. I don't know exactly why does
> it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
> also uses COND_EXEC I think there is a reason.
>
> Now the driver stores a new metadata value in order to reflect
> the last fast depth clear state. If a TC-compat HTILE is fast cleared
> with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
> work around that hardware bug.
>
> This fixes rendering issues with The Forest and DXVK and doesn't
> seem to introduce any regressions.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
> Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug")
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++----------
>  src/amd/vulkan/radv_image.c      | 10 +++-
>  src/amd/vulkan/radv_private.h    |  2 +
>  3 files changed, 75 insertions(+), 28 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index ccf762ead46..23909a0f7dd 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1067,7 +1067,7 @@ static void
>  radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>                              struct radv_ds_buffer_info *ds,
>                              struct radv_image *image, VkImageLayout layout,
> -                            bool requires_cond_write)
> +                            bool requires_cond_exec)
>  {
>         uint32_t db_z_info = ds->db_z_info;
>         uint32_t db_z_info_reg;
> @@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>         }
>
>         /* When we don't know the last fast clear value we need to emit a
> -        * conditional packet, otherwise we can update DB_Z_INFO directly.
> +        * conditional packet that will eventually skip the following
> +        * SET_CONTEXT_REG packet.
>          */
> -       if (requires_cond_write) {
> -               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
> -
> -               const uint32_t write_space = 0 << 8;    /* register */
> -               const uint32_t poll_space = 1 << 4;     /* memory */
> -               const uint32_t function = 3 << 0;       /* equal to the reference */
> -               const uint32_t options = write_space | poll_space | function;
> -               radeon_emit(cmd_buffer->cs, options);
> -
> -               /* poll address - location of the depth clear value */
> +       if (requires_cond_exec) {
>                 uint64_t va = radv_buffer_get_va(image->bo);
> -               va += image->offset + image->clear_value_offset;
> -
> -               /* In presence of stencil format, we have to adjust the base
> -                * address because the first value is the stencil clear value.
> -                */
> -               if (vk_format_is_stencil(image->vk_format))
> -                       va += 4;
> +               va += image->offset + image->tc_compat_zrange_offset;
>
> +               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
>                 radeon_emit(cmd_buffer->cs, va);
>                 radeon_emit(cmd_buffer->cs, va >> 32);
> -
> -               radeon_emit(cmd_buffer->cs, fui(0.0f));          /* reference value */
> -               radeon_emit(cmd_buffer->cs, (uint32_t)-1);       /* comparison mask */
> -               radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
> -               radeon_emit(cmd_buffer->cs, 0u);                 /* write address high */
> -               radeon_emit(cmd_buffer->cs, db_z_info);
> -       } else {
> -               radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
> +               radeon_emit(cmd_buffer->cs, 0);
> +               radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
>         }
> +
> +       radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
>  }
>
>  static void
> @@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>                 radeon_emit(cs, fui(ds_clear_value.depth));
>  }
>
> +/**
> + * Update the TC-compat metadata value for this image.
> + */
> +static void
> +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
> +                                  struct radv_image *image,
> +                                  uint32_t value)
> +{
> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
> +       uint64_t va = radv_buffer_get_va(image->bo);
> +       va += image->offset + image->tc_compat_zrange_offset;
> +
> +       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
> +       radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
> +                       S_370_WR_CONFIRM(1) |
> +                       S_370_ENGINE_SEL(V_370_PFP));
> +       radeon_emit(cs, va);
> +       radeon_emit(cs, va >> 32);
> +       radeon_emit(cs, value);
> +}
> +
> +static void
> +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
> +                                     struct radv_image *image,
> +                                     VkClearDepthStencilValue ds_clear_value)
> +{
> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
> +       uint64_t va = radv_buffer_get_va(image->bo);
> +       va += image->offset + image->tc_compat_zrange_offset;
> +       uint32_t cond_val;
> +
> +       /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
> +        * depth clear value is 0.0f.
> +        */
> +       cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
> +
> +       radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
> +}
> +
>  /**
>   * Update the clear depth/stencil values for this image.
>   */
> @@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>
>         radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
>
> +       if (radv_image_is_tc_compat_htile(image) &&
> +           (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
> +               radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
> +                                                     ds_clear_value);
> +       }
> +
>         radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
>                                         aspects);
>  }
> @@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
>                 aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
>
>         radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
> +
> +       if (radv_image_is_tc_compat_htile(image)) {
> +               /* Initialize the TC-compat metada value to 0 because by
> +                * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
> +                * need have to conditionally update its value when performing
> +                * a fast depth clear.
> +                */
> +               radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
> +       }
>  }
>
>  static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index f447166d80c..9fcbaa3184c 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
>         /* + 8 for storing the clear values */
>         image->clear_value_offset = image->htile_offset + image->surface.htile_size;
>         image->size = image->clear_value_offset + 8;
> +       if (radv_image_is_tc_compat_htile(image)) {
> +               /* Metadata for the TC-compatible HTILE hardware bug which
> +                * have to be fixed by updating ZRANGE_PRECISION when doing
> +                * fast depth clears to 0.0f.
> +                */
> +               image->tc_compat_zrange_offset = image->clear_value_offset + 8;
> +               image->size = image->clear_value_offset + 16;
> +       }
>         image->alignment = align64(image->alignment, image->surface.htile_alignment);
>  }
>
> @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
>                         /* Otherwise, try to enable HTILE for depth surfaces. */
>                         if (radv_image_can_enable_htile(image) &&
>                             !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
> -                               radv_image_alloc_htile(image);
>                                 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
> +                               radv_image_alloc_htile(image);
>                         } else {
>                                 image->surface.htile_size = 0;
>                         }
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index ac756f2c247..1ef5b215db7 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1503,6 +1503,8 @@ struct radv_image {
>         uint64_t clear_value_offset;
>         uint64_t fce_pred_offset;
>
> +       uint64_t tc_compat_zrange_offset;

Can we document what the values put into this memory location mean?

Otherwise

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

> +
>         /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
>         VkDeviceMemory owned_memory;
>  };
> --
> 2.19.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 12/3/18 11:21 PM, Bas Nieuwenhuizen wrote:
> On Mon, Dec 3, 2018 at 11:13 PM Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>>
>> After investigating on this, it appears that COND_WRITE doesn't
>> work correctly in some situations. I don't know exactly why does
>> it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
>> also uses COND_EXEC I think there is a reason.
>>
>> Now the driver stores a new metadata value in order to reflect
>> the last fast depth clear state. If a TC-compat HTILE is fast cleared
>> with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
>> work around that hardware bug.
>>
>> This fixes rendering issues with The Forest and DXVK and doesn't
>> seem to introduce any regressions.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
>> Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug")
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>>   src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++----------
>>   src/amd/vulkan/radv_image.c      | 10 +++-
>>   src/amd/vulkan/radv_private.h    |  2 +
>>   3 files changed, 75 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index ccf762ead46..23909a0f7dd 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -1067,7 +1067,7 @@ static void
>>   radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>>                               struct radv_ds_buffer_info *ds,
>>                               struct radv_image *image, VkImageLayout layout,
>> -                            bool requires_cond_write)
>> +                            bool requires_cond_exec)
>>   {
>>          uint32_t db_z_info = ds->db_z_info;
>>          uint32_t db_z_info_reg;
>> @@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
>>          }
>>
>>          /* When we don't know the last fast clear value we need to emit a
>> -        * conditional packet, otherwise we can update DB_Z_INFO directly.
>> +        * conditional packet that will eventually skip the following
>> +        * SET_CONTEXT_REG packet.
>>           */
>> -       if (requires_cond_write) {
>> -               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
>> -
>> -               const uint32_t write_space = 0 << 8;    /* register */
>> -               const uint32_t poll_space = 1 << 4;     /* memory */
>> -               const uint32_t function = 3 << 0;       /* equal to the reference */
>> -               const uint32_t options = write_space | poll_space | function;
>> -               radeon_emit(cmd_buffer->cs, options);
>> -
>> -               /* poll address - location of the depth clear value */
>> +       if (requires_cond_exec) {
>>                  uint64_t va = radv_buffer_get_va(image->bo);
>> -               va += image->offset + image->clear_value_offset;
>> -
>> -               /* In presence of stencil format, we have to adjust the base
>> -                * address because the first value is the stencil clear value.
>> -                */
>> -               if (vk_format_is_stencil(image->vk_format))
>> -                       va += 4;
>> +               va += image->offset + image->tc_compat_zrange_offset;
>>
>> +               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
>>                  radeon_emit(cmd_buffer->cs, va);
>>                  radeon_emit(cmd_buffer->cs, va >> 32);
>> -
>> -               radeon_emit(cmd_buffer->cs, fui(0.0f));          /* reference value */
>> -               radeon_emit(cmd_buffer->cs, (uint32_t)-1);       /* comparison mask */
>> -               radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
>> -               radeon_emit(cmd_buffer->cs, 0u);                 /* write address high */
>> -               radeon_emit(cmd_buffer->cs, db_z_info);
>> -       } else {
>> -               radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
>> +               radeon_emit(cmd_buffer->cs, 0);
>> +               radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
>>          }
>> +
>> +       radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
>>   }
>>
>>   static void
>> @@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>>                  radeon_emit(cs, fui(ds_clear_value.depth));
>>   }
>>
>> +/**
>> + * Update the TC-compat metadata value for this image.
>> + */
>> +static void
>> +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
>> +                                  struct radv_image *image,
>> +                                  uint32_t value)
>> +{
>> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
>> +       uint64_t va = radv_buffer_get_va(image->bo);
>> +       va += image->offset + image->tc_compat_zrange_offset;
>> +
>> +       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
>> +       radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
>> +                       S_370_WR_CONFIRM(1) |
>> +                       S_370_ENGINE_SEL(V_370_PFP));
>> +       radeon_emit(cs, va);
>> +       radeon_emit(cs, va >> 32);
>> +       radeon_emit(cs, value);
>> +}
>> +
>> +static void
>> +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
>> +                                     struct radv_image *image,
>> +                                     VkClearDepthStencilValue ds_clear_value)
>> +{
>> +       struct radeon_cmdbuf *cs = cmd_buffer->cs;
>> +       uint64_t va = radv_buffer_get_va(image->bo);
>> +       va += image->offset + image->tc_compat_zrange_offset;
>> +       uint32_t cond_val;
>> +
>> +       /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
>> +        * depth clear value is 0.0f.
>> +        */
>> +       cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
>> +
>> +       radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
>> +}
>> +
>>   /**
>>    * Update the clear depth/stencil values for this image.
>>    */
>> @@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
>>
>>          radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
>>
>> +       if (radv_image_is_tc_compat_htile(image) &&
>> +           (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
>> +               radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
>> +                                                     ds_clear_value);
>> +       }
>> +
>>          radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
>>                                          aspects);
>>   }
>> @@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
>>                  aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
>>
>>          radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
>> +
>> +       if (radv_image_is_tc_compat_htile(image)) {
>> +               /* Initialize the TC-compat metada value to 0 because by
>> +                * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
>> +                * need have to conditionally update its value when performing
>> +                * a fast depth clear.
>> +                */
>> +               radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
>> +       }
>>   }
>>
>>   static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>> index f447166d80c..9fcbaa3184c 100644
>> --- a/src/amd/vulkan/radv_image.c
>> +++ b/src/amd/vulkan/radv_image.c
>> @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
>>          /* + 8 for storing the clear values */
>>          image->clear_value_offset = image->htile_offset + image->surface.htile_size;
>>          image->size = image->clear_value_offset + 8;
>> +       if (radv_image_is_tc_compat_htile(image)) {
>> +               /* Metadata for the TC-compatible HTILE hardware bug which
>> +                * have to be fixed by updating ZRANGE_PRECISION when doing
>> +                * fast depth clears to 0.0f.
>> +                */
>> +               image->tc_compat_zrange_offset = image->clear_value_offset + 8;
>> +               image->size = image->clear_value_offset + 16;
>> +       }
>>          image->alignment = align64(image->alignment, image->surface.htile_alignment);
>>   }
>>
>> @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
>>                          /* Otherwise, try to enable HTILE for depth surfaces. */
>>                          if (radv_image_can_enable_htile(image) &&
>>                              !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
>> -                               radv_image_alloc_htile(image);
>>                                  image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
>> +                               radv_image_alloc_htile(image);
>>                          } else {
>>                                  image->surface.htile_size = 0;
>>                          }
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>> index ac756f2c247..1ef5b215db7 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -1503,6 +1503,8 @@ struct radv_image {
>>          uint64_t clear_value_offset;
>>          uint64_t fce_pred_offset;
>>
>> +       uint64_t tc_compat_zrange_offset;
> 
> Can we document what the values put into this memory location mean?
> 

Sure, will do before pushing.

> Otherwise
> 
> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> 
>> +
>>          /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
>>          VkDeviceMemory owned_memory;
>>   };
>> --
>> 2.19.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev