[v3,12/13] anv: Emit the fast clear color address, instead of value.

Submitted by Rafael Antognolli on Feb. 21, 2018, 9:45 p.m.

Details

Message ID 20180221214522.25065-13-rafael.antognolli@intel.com
State New
Headers show
Series "Use clear color address in surface state." ( rev: 2 ) in Mesa

Not browsing as part of any series.

Commit Message

Rafael Antognolli Feb. 21, 2018, 9:45 p.m.
On Gen10+, instead of copying the clear color from the state buffer to
the surface state, just use the address of the state buffer in the
surface state directly. This way we can avoid the copy from state buffer
to surface state.

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
---
 src/intel/vulkan/anv_image.c       | 19 ++++++++++++++
 src/intel/vulkan/anv_private.h     |  5 ++++
 src/intel/vulkan/genX_cmd_buffer.c | 52 +++++++++++++++++++++++++++++++++++---
 3 files changed, 72 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 0dafe03442d..6b7ea32cbb3 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1023,6 +1023,15 @@  anv_image_fill_surface_state(struct anv_device *device,
    const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ?
       0 : (image->planes[plane].bo_offset + aux_surface->offset);
 
+   bool use_clear_address = false;
+   struct anv_address clear_address = { .bo = NULL };
+   state_inout->clear_address = 0;
+   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
+       aux_usage != ISL_AUX_USAGE_HIZ) {
+      clear_address = anv_image_get_clear_color_addr(device, image, aspect);
+      use_clear_address = true;
+   }
+
    if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
        !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
        !isl_has_matching_typed_storage_image_format(&device->info,
@@ -1040,6 +1049,7 @@  anv_image_fill_surface_state(struct anv_device *device,
                             .mocs = device->default_mocs);
       state_inout->address = address,
       state_inout->aux_address = 0;
+      state_inout->clear_address = 0;
    } else {
       if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
           !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
@@ -1113,6 +1123,8 @@  anv_image_fill_surface_state(struct anv_device *device,
                           .aux_surf = &aux_surface->isl,
                           .aux_usage = aux_usage,
                           .aux_address = aux_address,
+                          .clear_address = clear_address.offset,
+                          .use_clear_address = use_clear_address,
                           .mocs = device->default_mocs,
                           .x_offset_sa = tile_x_sa,
                           .y_offset_sa = tile_y_sa);
@@ -1134,6 +1146,13 @@  anv_image_fill_surface_state(struct anv_device *device,
       assert((aux_address & 0xfff) == 0);
       assert(aux_address == (*aux_addr_dw & 0xfffff000));
       state_inout->aux_address = *aux_addr_dw;
+
+      if (device->info.gen >= 10 && clear_address.bo) {
+         uint32_t *clear_addr_dw = state_inout->state.map +
+                                   device->isl_dev.ss.clear_value_offset;
+         assert((clear_address.offset & 0x3f) == 0);
+         state_inout->clear_address = *clear_addr_dw;
+      }
    }
 
    anv_state_flush(device, state_inout->state);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index b8c381d2665..5c077987cef 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1674,6 +1674,11 @@  struct anv_surface_state {
     * bits of this address include extra aux information.
     */
    uint64_t aux_address;
+   /* Address of the clear color, if any
+    *
+    * This address is relative to the start of the BO.
+    */
+   uint64_t clear_address;
 };
 
 /**
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 939a795c2b1..b9e1d50cbe3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -200,6 +200,16 @@  add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
       if (result != VK_SUCCESS)
          anv_batch_set_error(&cmd_buffer->batch, result);
    }
+
+   if (state.clear_address) {
+      VkResult result =
+         anv_reloc_list_add(&cmd_buffer->surface_relocs,
+                            &cmd_buffer->pool->alloc,
+                            state.state.offset + isl_dev->ss.clear_value_offset,
+                            image->planes[image_plane].bo, state.clear_address);
+      if (result != VK_SUCCESS)
+         anv_batch_set_error(&cmd_buffer->batch, result);
+   }
 }
 
 static void
@@ -1056,6 +1066,35 @@  transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
       ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
 }
 
+static void
+update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
+                        const struct anv_attachment_state *att_state,
+                        const struct anv_image *image)
+{
+   assert(GEN_GEN >= 10);
+   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+   struct anv_address clear_address =
+      anv_image_get_clear_color_addr(cmd_buffer->device, image,
+                                     VK_IMAGE_ASPECT_COLOR_BIT);
+   union isl_color_value fast_clear_value;
+   memcpy(fast_clear_value.u32, att_state->clear_value.color.uint32,
+          sizeof(fast_clear_value.u32));
+
+   /* Clear values are stored at the same bo as the aux surface, right
+    * after the surface.
+    */
+   for (int i = 0; i < cmd_buffer->device->isl_dev.ss.clear_value_size / 4; i++) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.Address = (struct anv_address) {
+            .bo = clear_address.bo,
+            .offset = clear_address.offset + i * 4,
+         };
+         sdi.ImmediateData = fast_clear_value.u32[i];
+      }
+   }
+}
+
 /**
  * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
  */
@@ -3403,9 +3442,13 @@  cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
             base_clear_layer++;
             clear_layer_count--;
 
-            genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
-                                         image, VK_IMAGE_ASPECT_COLOR_BIT,
-                                         true /* copy from ss */);
+            if (GEN_GEN < 10) {
+               genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
+                                            image, VK_IMAGE_ASPECT_COLOR_BIT,
+                                            true /* copy from ss */);
+            } else {
+               update_fast_clear_color(cmd_buffer, att_state, image);
+            }
 
             if (att_state->clear_color_is_zero) {
                /* This image has the auxiliary buffer enabled. We can mark the
@@ -3462,7 +3505,8 @@  cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
          assert(att_state->pending_clear_aspects == 0);
       }
 
-      if (att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
+      if (GEN_GEN < 10 &&
+          (att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)) {
          if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
             genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state,
                                          image, VK_IMAGE_ASPECT_COLOR_BIT,

Comments

On Wed, Feb 21, 2018 at 1:45 PM, Rafael Antognolli <
rafael.antognolli@intel.com> wrote:

> On Gen10+, instead of copying the clear color from the state buffer to
> the surface state, just use the address of the state buffer in the
> surface state directly. This way we can avoid the copy from state buffer
> to surface state.
>
> Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
> ---
>  src/intel/vulkan/anv_image.c       | 19 ++++++++++++++
>  src/intel/vulkan/anv_private.h     |  5 ++++
>  src/intel/vulkan/genX_cmd_buffer.c | 52 ++++++++++++++++++++++++++++++
> +++++---
>  3 files changed, 72 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 0dafe03442d..6b7ea32cbb3 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -1023,6 +1023,15 @@ anv_image_fill_surface_state(struct anv_device
> *device,
>     const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ?
>        0 : (image->planes[plane].bo_offset + aux_surface->offset);
>
> +   bool use_clear_address = false;
> +   struct anv_address clear_address = { .bo = NULL };
> +   state_inout->clear_address = 0;
> +   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
> +       aux_usage != ISL_AUX_USAGE_HIZ) {
> +      clear_address = anv_image_get_clear_color_addr(device, image,
> aspect);
> +      use_clear_address = true;
> +   }
> +
>     if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
>         !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
>         !isl_has_matching_typed_storage_image_format(&device->info,
> @@ -1040,6 +1049,7 @@ anv_image_fill_surface_state(struct anv_device
> *device,
>                              .mocs = device->default_mocs);
>        state_inout->address = address,
>        state_inout->aux_address = 0;
> +      state_inout->clear_address = 0;
>     } else {
>        if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
>            !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
> @@ -1113,6 +1123,8 @@ anv_image_fill_surface_state(struct anv_device
> *device,
>                            .aux_surf = &aux_surface->isl,
>                            .aux_usage = aux_usage,
>                            .aux_address = aux_address,
> +                          .clear_address = clear_address.offset,
> +                          .use_clear_address = use_clear_address,
>                            .mocs = device->default_mocs,
>                            .x_offset_sa = tile_x_sa,
>                            .y_offset_sa = tile_y_sa);
> @@ -1134,6 +1146,13 @@ anv_image_fill_surface_state(struct anv_device
> *device,
>        assert((aux_address & 0xfff) == 0);
>        assert(aux_address == (*aux_addr_dw & 0xfffff000));
>        state_inout->aux_address = *aux_addr_dw;
> +
> +      if (device->info.gen >= 10 && clear_address.bo) {
>

Here you use clear_address.bo != NULL but above you use use_clear_address.
Probably best to just pick one and stick with it.


> +         uint32_t *clear_addr_dw = state_inout->state.map +
> +                                   device->isl_dev.ss.clear_value_offset;
> +         assert((clear_address.offset & 0x3f) == 0);
> +         state_inout->clear_address = *clear_addr_dw;
> +      }
>     }
>
>     anv_state_flush(device, state_inout->state);
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index b8c381d2665..5c077987cef 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1674,6 +1674,11 @@ struct anv_surface_state {
>      * bits of this address include extra aux information.
>      */
>     uint64_t aux_address;
> +   /* Address of the clear color, if any
> +    *
> +    * This address is relative to the start of the BO.
> +    */
> +   uint64_t clear_address;
>  };
>
>  /**
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 939a795c2b1..b9e1d50cbe3 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -200,6 +200,16 @@ add_image_view_relocs(struct anv_cmd_buffer
> *cmd_buffer,
>        if (result != VK_SUCCESS)
>           anv_batch_set_error(&cmd_buffer->batch, result);
>     }
> +
> +   if (state.clear_address) {
> +      VkResult result =
> +         anv_reloc_list_add(&cmd_buffer->surface_relocs,
> +                            &cmd_buffer->pool->alloc,
> +                            state.state.offset + isl_dev->ss.clear_value_
> offset,
>

I'm not sure how comfortable I am with ss.clear_value_offset doing
double-duty for inline clear values and clear value addresses.  I suppose
it's probably ok because the only overlap is gen10 and we know it matches
there.


> +                            image->planes[image_plane].bo,
> state.clear_address);
> +      if (result != VK_SUCCESS)
> +         anv_batch_set_error(&cmd_buffer->batch, result);
> +   }
>  }
>
>  static void
> @@ -1056,6 +1066,35 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
>        ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>  }
>
> +static void
> +update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
> +                        const struct anv_attachment_state *att_state,
> +                        const struct anv_image *image)
> +{
> +   assert(GEN_GEN >= 10);
> +   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> +
> +   struct anv_address clear_address =
> +      anv_image_get_clear_color_addr(cmd_buffer->device, image,
> +                                     VK_IMAGE_ASPECT_COLOR_BIT);
> +   union isl_color_value fast_clear_value;
> +   memcpy(fast_clear_value.u32, att_state->clear_value.color.uint32,
> +          sizeof(fast_clear_value.u32));
>

I think we probably want to break the clear value handling code from
color_attachment_compute_aux_usage into a helper and use that here instead
of the memcpy.  That way things are a bit sanitized.


> +
> +   /* Clear values are stored at the same bo as the aux surface, right
> +    * after the surface.
> +    */
> +   for (int i = 0; i < cmd_buffer->device->isl_dev.ss.clear_value_size /
> 4; i++) {
>

This only gets called on gen10+, just make it 4.  If it's ever anything
else, we'll have to modify the stuff below since it just pulls from
fast_clear_value.u32


> +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +         sdi.Address = (struct anv_address) {
> +            .bo = clear_address.bo,
> +            .offset = clear_address.offset + i * 4,
> +         };
> +         sdi.ImmediateData = fast_clear_value.u32[i];
> +      }
> +   }
> +}
> +
>  /**
>   * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
>   */
> @@ -3403,9 +3442,13 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer
> *cmd_buffer,
>              base_clear_layer++;
>              clear_layer_count--;
>
> -            genX(copy_fast_clear_dwords)(cmd_buffer,
> att_state->color.state,
> -                                         image, VK_IMAGE_ASPECT_COLOR_BIT,
> -                                         true /* copy from ss */);
> +            if (GEN_GEN < 10) {
> +               genX(copy_fast_clear_dwords)(cmd_buffer,
> att_state->color.state,
> +                                            image,
> VK_IMAGE_ASPECT_COLOR_BIT,
> +                                            true /* copy from ss */);
> +            } else {
> +               update_fast_clear_color(cmd_buffer, att_state, image);
> +            }
>
>              if (att_state->clear_color_is_zero) {
>                 /* This image has the auxiliary buffer enabled. We can
> mark the
> @@ -3462,7 +3505,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer
> *cmd_buffer,
>           assert(att_state->pending_clear_aspects == 0);
>        }
>
> -      if (att_state->pending_load_aspects &
> VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
> +      if (GEN_GEN < 10 &&
> +          (att_state->pending_load_aspects &
> VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)) {
>           if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
>              genX(copy_fast_clear_dwords)(cmd_buffer,
> att_state->color.state,
>                                           image, VK_IMAGE_ASPECT_COLOR_BIT,
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Tue, Feb 27, 2018 at 02:58:01PM -0800, Jason Ekstrand wrote:
> On Wed, Feb 21, 2018 at 1:45 PM, Rafael Antognolli <rafael.antognolli@intel.com
> > wrote:
> 
>     On Gen10+, instead of copying the clear color from the state buffer to
>     the surface state, just use the address of the state buffer in the
>     surface state directly. This way we can avoid the copy from state buffer
>     to surface state.
> 
>     Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
>     ---
>      src/intel/vulkan/anv_image.c       | 19 ++++++++++++++
>      src/intel/vulkan/anv_private.h     |  5 ++++
>      src/intel/vulkan/genX_cmd_buffer.c | 52 ++++++++++++++++++++++++++++++
>     +++++---
>      3 files changed, 72 insertions(+), 4 deletions(-)
> 
>     diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
>     index 0dafe03442d..6b7ea32cbb3 100644
>     --- a/src/intel/vulkan/anv_image.c
>     +++ b/src/intel/vulkan/anv_image.c
>     @@ -1023,6 +1023,15 @@ anv_image_fill_surface_state(struct anv_device
>     *device,
>         const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ?
>            0 : (image->planes[plane].bo_offset + aux_surface->offset);
> 
>     +   bool use_clear_address = false;
>     +   struct anv_address clear_address = { .bo = NULL };
>     +   state_inout->clear_address = 0;
>     +   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
>     +       aux_usage != ISL_AUX_USAGE_HIZ) {
>     +      clear_address = anv_image_get_clear_color_addr(device, image,
>     aspect);
>     +      use_clear_address = true;
>     +   }
>     +
>         if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
>             !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
>             !isl_has_matching_typed_storage_image_format(&device->info,
>     @@ -1040,6 +1049,7 @@ anv_image_fill_surface_state(struct anv_device
>     *device,
>                                  .mocs = device->default_mocs);
>            state_inout->address = address,
>            state_inout->aux_address = 0;
>     +      state_inout->clear_address = 0;
>         } else {
>            if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
>                !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
>     @@ -1113,6 +1123,8 @@ anv_image_fill_surface_state(struct anv_device
>     *device,
>                                .aux_surf = &aux_surface->isl,
>                                .aux_usage = aux_usage,
>                                .aux_address = aux_address,
>     +                          .clear_address = clear_address.offset,
>     +                          .use_clear_address = use_clear_address,
>                                .mocs = device->default_mocs,
>                                .x_offset_sa = tile_x_sa,
>                                .y_offset_sa = tile_y_sa);
>     @@ -1134,6 +1146,13 @@ anv_image_fill_surface_state(struct anv_device
>     *device,
>            assert((aux_address & 0xfff) == 0);
>            assert(aux_address == (*aux_addr_dw & 0xfffff000));
>            state_inout->aux_address = *aux_addr_dw;
>     +
>     +      if (device->info.gen >= 10 && clear_address.bo) {
> 
> 
> Here you use clear_address.bo != NULL but above you use use_clear_address. 
> Probably best to just pick one and stick with it.
>  
> 
>     +         uint32_t *clear_addr_dw = state_inout->state.map +
>     +                                   device->isl_dev.ss.clear_value_offset;
>     +         assert((clear_address.offset & 0x3f) == 0);
>     +         state_inout->clear_address = *clear_addr_dw;
>     +      }
>         }
> 
>         anv_state_flush(device, state_inout->state);
>     diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
>     private.h
>     index b8c381d2665..5c077987cef 100644
>     --- a/src/intel/vulkan/anv_private.h
>     +++ b/src/intel/vulkan/anv_private.h
>     @@ -1674,6 +1674,11 @@ struct anv_surface_state {
>          * bits of this address include extra aux information.
>          */
>         uint64_t aux_address;
>     +   /* Address of the clear color, if any
>     +    *
>     +    * This address is relative to the start of the BO.
>     +    */
>     +   uint64_t clear_address;
>      };
> 
>      /**
>     diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/
>     genX_cmd_buffer.c
>     index 939a795c2b1..b9e1d50cbe3 100644
>     --- a/src/intel/vulkan/genX_cmd_buffer.c
>     +++ b/src/intel/vulkan/genX_cmd_buffer.c
>     @@ -200,6 +200,16 @@ add_image_view_relocs(struct anv_cmd_buffer
>     *cmd_buffer,
>            if (result != VK_SUCCESS)
>               anv_batch_set_error(&cmd_buffer->batch, result);
>         }
>     +
>     +   if (state.clear_address) {
>     +      VkResult result =
>     +         anv_reloc_list_add(&cmd_buffer->surface_relocs,
>     +                            &cmd_buffer->pool->alloc,
>     +                            state.state.offset + isl_dev->ss.clear_value_
>     offset,
> 
> 
> I'm not sure how comfortable I am with ss.clear_value_offset doing double-duty
> for inline clear values and clear value addresses.  I suppose it's probably ok
> because the only overlap is gen10 and we know it matches there.
>  
> 
>     +                            image->planes[image_plane].bo,
>     state.clear_address);
>     +      if (result != VK_SUCCESS)
>     +         anv_batch_set_error(&cmd_buffer->batch, result);
>     +   }
>      }
> 
>      static void
>     @@ -1056,6 +1066,35 @@ transition_color_buffer(struct anv_cmd_buffer
>     *cmd_buffer,
>            ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>      }
> 
>     +static void
>     +update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
>     +                        const struct anv_attachment_state *att_state,
>     +                        const struct anv_image *image)
>     +{
>     +   assert(GEN_GEN >= 10);
>     +   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
>     +
>     +   struct anv_address clear_address =
>     +      anv_image_get_clear_color_addr(cmd_buffer->device, image,
>     +                                     VK_IMAGE_ASPECT_COLOR_BIT);
>     +   union isl_color_value fast_clear_value;
>     +   memcpy(fast_clear_value.u32, att_state->clear_value.color.uint32,
>     +          sizeof(fast_clear_value.u32));
> 
> 
> I think we probably want to break the clear value handling code from
> color_attachment_compute_aux_usage into a helper and use that here instead of
> the memcpy.  That way things are a bit sanitized.

Hmmm... Reading again this code, I'm trying to remember why I memcpy the
clear color from att_state to fast_clear_value. It looks like I could
just have used it directly, like:

            sdi.ImmediateData = att_state->clear_value.color.uint32[i];

So, do you have a preference between this and the copy done in
color_attachment_compute_aux_usage?

>     +
>     +   /* Clear values are stored at the same bo as the aux surface, right
>     +    * after the surface.
>     +    */
>     +   for (int i = 0; i < cmd_buffer->device->isl_dev.ss.clear_value_size /
>     4; i++) {
> 
> 
> This only gets called on gen10+, just make it 4.  If it's ever anything else,
> we'll have to modify the stuff below since it just pulls from
> fast_clear_value.u32
>  
> 
>     +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
>     +         sdi.Address = (struct anv_address) {
>     +            .bo = clear_address.bo,
>     +            .offset = clear_address.offset + i * 4,
>     +         };
>     +         sdi.ImmediateData = fast_clear_value.u32[i];
>     +      }
>     +   }
>     +}
>     +
>      /**
>       * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
>       */
>     @@ -3403,9 +3442,13 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer
>     *cmd_buffer,
>                  base_clear_layer++;
>                  clear_layer_count--;
> 
>     -            genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
>     color.state,
>     -                                         image, VK_IMAGE_ASPECT_COLOR_BIT,
>     -                                         true /* copy from ss */);
>     +            if (GEN_GEN < 10) {
>     +               genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
>     color.state,
>     +                                            image,
>     VK_IMAGE_ASPECT_COLOR_BIT,
>     +                                            true /* copy from ss */);
>     +            } else {
>     +               update_fast_clear_color(cmd_buffer, att_state, image);
>     +            }
> 
>                  if (att_state->clear_color_is_zero) {
>                     /* This image has the auxiliary buffer enabled. We can mark
>     the
>     @@ -3462,7 +3505,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer
>     *cmd_buffer,
>               assert(att_state->pending_clear_aspects == 0);
>            }
> 
>     -      if (att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_
>     ANV) {
>     +      if (GEN_GEN < 10 &&
>     +          (att_state->pending_load_aspects &
>     VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)) {
>               if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
>                  genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
>     color.state,
>                                               image, VK_IMAGE_ASPECT_COLOR_BIT,
>     --
>     2.14.3
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev@lists.freedesktop.org
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
>
On Tue, Feb 27, 2018 at 4:45 PM, Rafael Antognolli <
rafael.antognolli@intel.com> wrote:

> On Tue, Feb 27, 2018 at 02:58:01PM -0800, Jason Ekstrand wrote:
> > On Wed, Feb 21, 2018 at 1:45 PM, Rafael Antognolli <
> rafael.antognolli@intel.com
> > > wrote:
> >
> >     On Gen10+, instead of copying the clear color from the state buffer
> to
> >     the surface state, just use the address of the state buffer in the
> >     surface state directly. This way we can avoid the copy from state
> buffer
> >     to surface state.
> >
> >     Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
> >     ---
> >      src/intel/vulkan/anv_image.c       | 19 ++++++++++++++
> >      src/intel/vulkan/anv_private.h     |  5 ++++
> >      src/intel/vulkan/genX_cmd_buffer.c | 52
> ++++++++++++++++++++++++++++++
> >     +++++---
> >      3 files changed, 72 insertions(+), 4 deletions(-)
> >
> >     diff --git a/src/intel/vulkan/anv_image.c
> b/src/intel/vulkan/anv_image.c
> >     index 0dafe03442d..6b7ea32cbb3 100644
> >     --- a/src/intel/vulkan/anv_image.c
> >     +++ b/src/intel/vulkan/anv_image.c
> >     @@ -1023,6 +1023,15 @@ anv_image_fill_surface_state(struct
> anv_device
> >     *device,
> >         const uint64_t aux_address = aux_usage == ISL_AUX_USAGE_NONE ?
> >            0 : (image->planes[plane].bo_offset + aux_surface->offset);
> >
> >     +   bool use_clear_address = false;
> >     +   struct anv_address clear_address = { .bo = NULL };
> >     +   state_inout->clear_address = 0;
> >     +   if (device->info.gen >= 10 && aux_usage != ISL_AUX_USAGE_NONE &&
> >     +       aux_usage != ISL_AUX_USAGE_HIZ) {
> >     +      clear_address = anv_image_get_clear_color_addr(device, image,
> >     aspect);
> >     +      use_clear_address = true;
> >     +   }
> >     +
> >         if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
> >             !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
> >             !isl_has_matching_typed_storage_image_format(&device->info,
> >     @@ -1040,6 +1049,7 @@ anv_image_fill_surface_state(struct anv_device
> >     *device,
> >                                  .mocs = device->default_mocs);
> >            state_inout->address = address,
> >            state_inout->aux_address = 0;
> >     +      state_inout->clear_address = 0;
> >         } else {
> >            if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
> >                !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
> >     @@ -1113,6 +1123,8 @@ anv_image_fill_surface_state(struct anv_device
> >     *device,
> >                                .aux_surf = &aux_surface->isl,
> >                                .aux_usage = aux_usage,
> >                                .aux_address = aux_address,
> >     +                          .clear_address = clear_address.offset,
> >     +                          .use_clear_address = use_clear_address,
> >                                .mocs = device->default_mocs,
> >                                .x_offset_sa = tile_x_sa,
> >                                .y_offset_sa = tile_y_sa);
> >     @@ -1134,6 +1146,13 @@ anv_image_fill_surface_state(struct
> anv_device
> >     *device,
> >            assert((aux_address & 0xfff) == 0);
> >            assert(aux_address == (*aux_addr_dw & 0xfffff000));
> >            state_inout->aux_address = *aux_addr_dw;
> >     +
> >     +      if (device->info.gen >= 10 && clear_address.bo) {
> >
> >
> > Here you use clear_address.bo != NULL but above you use
> use_clear_address.
> > Probably best to just pick one and stick with it.
> >
> >
> >     +         uint32_t *clear_addr_dw = state_inout->state.map +
> >     +                                   device->isl_dev.ss.clear_
> value_offset;
> >     +         assert((clear_address.offset & 0x3f) == 0);
> >     +         state_inout->clear_address = *clear_addr_dw;
> >     +      }
> >         }
> >
> >         anv_state_flush(device, state_inout->state);
> >     diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> >     private.h
> >     index b8c381d2665..5c077987cef 100644
> >     --- a/src/intel/vulkan/anv_private.h
> >     +++ b/src/intel/vulkan/anv_private.h
> >     @@ -1674,6 +1674,11 @@ struct anv_surface_state {
> >          * bits of this address include extra aux information.
> >          */
> >         uint64_t aux_address;
> >     +   /* Address of the clear color, if any
> >     +    *
> >     +    * This address is relative to the start of the BO.
> >     +    */
> >     +   uint64_t clear_address;
> >      };
> >
> >      /**
> >     diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/
> >     genX_cmd_buffer.c
> >     index 939a795c2b1..b9e1d50cbe3 100644
> >     --- a/src/intel/vulkan/genX_cmd_buffer.c
> >     +++ b/src/intel/vulkan/genX_cmd_buffer.c
> >     @@ -200,6 +200,16 @@ add_image_view_relocs(struct anv_cmd_buffer
> >     *cmd_buffer,
> >            if (result != VK_SUCCESS)
> >               anv_batch_set_error(&cmd_buffer->batch, result);
> >         }
> >     +
> >     +   if (state.clear_address) {
> >     +      VkResult result =
> >     +         anv_reloc_list_add(&cmd_buffer->surface_relocs,
> >     +                            &cmd_buffer->pool->alloc,
> >     +                            state.state.offset +
> isl_dev->ss.clear_value_
> >     offset,
> >
> >
> > I'm not sure how comfortable I am with ss.clear_value_offset doing
> double-duty
> > for inline clear values and clear value addresses.  I suppose it's
> probably ok
> > because the only overlap is gen10 and we know it matches there.
> >
> >
> >     +                            image->planes[image_plane].bo,
> >     state.clear_address);
> >     +      if (result != VK_SUCCESS)
> >     +         anv_batch_set_error(&cmd_buffer->batch, result);
> >     +   }
> >      }
> >
> >      static void
> >     @@ -1056,6 +1066,35 @@ transition_color_buffer(struct anv_cmd_buffer
> >     *cmd_buffer,
> >            ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
> ANV_PIPE_CS_STALL_BIT;
> >      }
> >
> >     +static void
> >     +update_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
> >     +                        const struct anv_attachment_state
> *att_state,
> >     +                        const struct anv_image *image)
> >     +{
> >     +   assert(GEN_GEN >= 10);
> >     +   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> >     +
> >     +   struct anv_address clear_address =
> >     +      anv_image_get_clear_color_addr(cmd_buffer->device, image,
> >     +                                     VK_IMAGE_ASPECT_COLOR_BIT);
> >     +   union isl_color_value fast_clear_value;
> >     +   memcpy(fast_clear_value.u32, att_state->clear_value.color.
> uint32,
> >     +          sizeof(fast_clear_value.u32));
> >
> >
> > I think we probably want to break the clear value handling code from
> > color_attachment_compute_aux_usage into a helper and use that here
> instead of
> > the memcpy.  That way things are a bit sanitized.
>
> Hmmm... Reading again this code, I'm trying to remember why I memcpy the
> clear color from att_state to fast_clear_value. It looks like I could
> just have used it directly, like:
>
>             sdi.ImmediateData = att_state->clear_value.color.uint32[i];
>
> So, do you have a preference between this and the copy done in
> color_attachment_compute_aux_usage?
>

I think I'd prefer the copy done in color_attachment_compute_aux_usage
because it sanatizes the value and prevents us from writing undefined
garbage into the buffer.


> >     +
> >     +   /* Clear values are stored at the same bo as the aux surface,
> right
> >     +    * after the surface.
> >     +    */
> >     +   for (int i = 0; i < cmd_buffer->device->isl_dev.ss.clear_value_size
> /
> >     4; i++) {
> >
> >
> > This only gets called on gen10+, just make it 4.  If it's ever anything
> else,
> > we'll have to modify the stuff below since it just pulls from
> > fast_clear_value.u32
> >
> >
> >     +      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM),
> sdi) {
> >     +         sdi.Address = (struct anv_address) {
> >     +            .bo = clear_address.bo,
> >     +            .offset = clear_address.offset + i * 4,
> >     +         };
> >     +         sdi.ImmediateData = fast_clear_value.u32[i];
> >     +      }
> >     +   }
> >     +}
> >     +
> >      /**
> >       * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
> >       */
> >     @@ -3403,9 +3442,13 @@ cmd_buffer_begin_subpass(struct
> anv_cmd_buffer
> >     *cmd_buffer,
> >                  base_clear_layer++;
> >                  clear_layer_count--;
> >
> >     -            genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
> >     color.state,
> >     -                                         image,
> VK_IMAGE_ASPECT_COLOR_BIT,
> >     -                                         true /* copy from ss */);
> >     +            if (GEN_GEN < 10) {
> >     +               genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
> >     color.state,
> >     +                                            image,
> >     VK_IMAGE_ASPECT_COLOR_BIT,
> >     +                                            true /* copy from ss
> */);
> >     +            } else {
> >     +               update_fast_clear_color(cmd_buffer, att_state,
> image);
> >     +            }
> >
> >                  if (att_state->clear_color_is_zero) {
> >                     /* This image has the auxiliary buffer enabled. We
> can mark
> >     the
> >     @@ -3462,7 +3505,8 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer
> >     *cmd_buffer,
> >               assert(att_state->pending_clear_aspects == 0);
> >            }
> >
> >     -      if (att_state->pending_load_aspects &
> VK_IMAGE_ASPECT_ANY_COLOR_BIT_
> >     ANV) {
> >     +      if (GEN_GEN < 10 &&
> >     +          (att_state->pending_load_aspects &
> >     VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)) {
> >               if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
> >                  genX(copy_fast_clear_dwords)(cmd_buffer, att_state->
> >     color.state,
> >                                               image,
> VK_IMAGE_ASPECT_COLOR_BIT,
> >     --
> >     2.14.3
> >
> >     _______________________________________________
> >     mesa-dev mailing list
> >     mesa-dev@lists.freedesktop.org
> >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> >
>