[v3,21/25] panfrost: Add new helpers to describe job depencencies on BOs

Submitted by Boris Brezillon on Sept. 5, 2019, 7:41 p.m.

Details

Message ID 20190905194150.17608-22-boris.brezillon@collabora.com
State New
Headers show
Series "panfrost: Rework the batch pipelining logic" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Boris Brezillon Sept. 5, 2019, 7:41 p.m.
Batch ordering is most of the time enforced by the resources they are
reading/writing from/to. This patch adds some new helpers to keep track
of that and modifies the existing add_bo() helper to pass flags encoding
the type of access a batch intends to do on this BO.

Since all resources are backed by BOs, and
given we might want to describe dependencies on BOs that are not
exposed as resources, we decided to use BOs as keys on our hash tables.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 src/gallium/drivers/panfrost/pan_allocate.c   |   2 +-
 src/gallium/drivers/panfrost/pan_blend_cso.c  |   2 +-
 src/gallium/drivers/panfrost/pan_context.c    |  10 +-
 src/gallium/drivers/panfrost/pan_context.h    |   5 +
 src/gallium/drivers/panfrost/pan_drm.c        |   6 +-
 src/gallium/drivers/panfrost/pan_fragment.c   |   2 +-
 src/gallium/drivers/panfrost/pan_instancing.c |   2 +-
 src/gallium/drivers/panfrost/pan_job.c        | 124 +++++++++++++++++-
 src/gallium/drivers/panfrost/pan_job.h        |  21 ++-
 src/gallium/drivers/panfrost/pan_varyings.c   |   2 +-
 10 files changed, 159 insertions(+), 17 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c
index 7938196e3e4f..7b0a7baa32dc 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -67,7 +67,7 @@  panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz)
 
                 /* We can't reuse the current BO, but we can create a new one. */
                 bo = panfrost_bo_create(screen, bo_sz, 0);
-                panfrost_batch_add_bo(batch, bo);
+                panfrost_batch_add_bo(batch, bo, PAN_PRIVATE_BO);
 
                 /* Creating a BO adds a reference, and then the job adds a
                  * second one. So we need to pop back one reference */
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c
index 69897be4f007..b27e36a7ce28 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -277,7 +277,7 @@  panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
         memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
 
         /* Pass BO ownership to job */
-        panfrost_batch_add_bo(batch, final.shader.bo);
+        panfrost_batch_add_bo(batch, final.shader.bo, PAN_PRIVATE_BO);
         panfrost_bo_unreference(final.shader.bo);
 
         if (shader->patch_index) {
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 3e0a3e9df992..c31dc1580524 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -160,6 +160,7 @@  panfrost_clear(
         struct panfrost_context *ctx = pan_context(pipe);
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
+        panfrost_batch_add_fbo_bos(batch);
         panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
 
@@ -605,7 +606,7 @@  panfrost_upload_tex(
 
         /* Add the BO to the job so it's retained until the job is done. */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        panfrost_batch_add_bo(batch, rsrc->bo);
+        panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);
 
         /* Add the usage flags in, since they can change across the CSO
          * lifetime due to layout switches */
@@ -724,7 +725,7 @@  static void panfrost_upload_ssbo_sysval(
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
 
-        panfrost_batch_add_bo(batch, bo);
+        panfrost_batch_add_bo(batch, bo, PAN_SHARED_BO_RW);
 
         /* Upload address and size as sysval */
         uniform->du[0] = bo->gpu + sb.buffer_offset;
@@ -878,6 +879,7 @@  panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 
+        panfrost_batch_add_fbo_bos(batch);
         panfrost_attach_vt_framebuffer(ctx);
 
         if (with_vertex_data) {
@@ -929,7 +931,7 @@  panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                 panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
 
-                panfrost_batch_add_bo(batch, variant->bo);
+                panfrost_batch_add_bo(batch, variant->bo, PAN_PRIVATE_BO);
 
 #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
 
@@ -1389,7 +1391,7 @@  panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
 
         if (!info->has_user_indices) {
                 /* Only resources can be directly mapped */
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);
                 return rsrc->bo->gpu + offset;
         } else {
                 /* Otherwise, we need to upload to transient memory */
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index f0578d6808d2..5e7fa9747a55 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,11 @@  struct panfrost_context {
         struct panfrost_batch *batch;
         struct hash_table *batches;
 
+        /* BO -> set contain all jobs reading this BO */
+        struct hash_table *read_bos;
+        /* BO -> job writing this BO (only one allowed at any given time) */
+        struct hash_table *write_bos;
+
         /* Within a launch_grid call.. */
         const struct pipe_grid_info *compute_grid;
 
diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
index f76c097345f1..b4b2396fd41e 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -90,9 +90,9 @@  panfrost_drm_submit_vs_fs_batch(struct panfrost_batch *batch, bool has_draws)
         struct panfrost_context *ctx = batch->ctx;
         int ret = 0;
 
-        panfrost_batch_add_bo(batch, ctx->scratchpad);
-        panfrost_batch_add_bo(batch, ctx->tiler_heap);
-        panfrost_batch_add_bo(batch, batch->polygon_list);
+        panfrost_batch_add_bo(batch, ctx->scratchpad, PAN_PRIVATE_BO);
+        panfrost_batch_add_bo(batch, ctx->tiler_heap, PAN_PRIVATE_BO);
+        panfrost_batch_add_bo(batch, batch->polygon_list, PAN_PRIVATE_BO);
 
         if (batch->first_job.gpu) {
                 ret = panfrost_drm_submit_batch(batch, batch->first_job.gpu, 0);
diff --git a/src/gallium/drivers/panfrost/pan_fragment.c b/src/gallium/drivers/panfrost/pan_fragment.c
index 2b6ffd841fe9..cbb95b79f52a 100644
--- a/src/gallium/drivers/panfrost/pan_fragment.c
+++ b/src/gallium/drivers/panfrost/pan_fragment.c
@@ -44,7 +44,7 @@  panfrost_initialize_surface(
         rsrc->slices[level].initialized = true;
 
         assert(rsrc->bo);
-        panfrost_batch_add_bo(batch, rsrc->bo);
+        panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
 }
 
 /* Generate a fragment job. This should be called once per frame. (According to
diff --git a/src/gallium/drivers/panfrost/pan_instancing.c b/src/gallium/drivers/panfrost/pan_instancing.c
index 806823f0da0d..0057fccc7eb2 100644
--- a/src/gallium/drivers/panfrost/pan_instancing.c
+++ b/src/gallium/drivers/panfrost/pan_instancing.c
@@ -298,7 +298,7 @@  panfrost_emit_vertex_data(struct panfrost_batch *batch)
                 unsigned chopped_addr = raw_addr - addr;
 
                 /* Add a dependency of the batch on the vertex buffer */
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);
 
                 /* Set common fields */
                 attrs[k].elements = addr;
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index b384dd74267a..8eda110542c3 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -63,6 +63,21 @@  panfrost_free_batch(struct panfrost_batch *batch)
 
         set_foreach(batch->bos, entry) {
                 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                struct hash_entry *entry;
+
+                _mesa_hash_table_remove_key(ctx->write_bos, bo);
+                entry = _mesa_hash_table_search(ctx->read_bos, bo);
+                if (entry) {
+                        struct set *set = entry->data;
+                        if (set) {
+                                _mesa_set_remove_key(set, batch);
+                                if (!set->entries) {
+                                        _mesa_set_destroy(set, NULL);
+                                        _mesa_hash_table_remove(ctx->read_bos, entry);
+                                }
+                        }
+                }
+
                 panfrost_bo_unreference(bo);
         }
 
@@ -70,6 +85,7 @@  panfrost_free_batch(struct panfrost_batch *batch)
         panfrost_bo_unreference(batch->polygon_list);
 
         _mesa_hash_table_remove_key(ctx->batches, &batch->key);
+        util_unreference_framebuffer_state(&batch->key);
 
         if (ctx->batch == batch)
                 ctx->batch = NULL;
@@ -128,17 +144,98 @@  panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
         return batch;
 }
 
+static void
+panfrost_batch_reads_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
+{
+        struct panfrost_context *ctx = batch->ctx;
+        struct hash_entry *entry;
+        struct set *set;
+
+        entry = _mesa_hash_table_search(ctx->read_bos, bo);
+        if (entry) {
+                set = entry->data;
+        } else {
+                set = _mesa_set_create(ctx, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+                assert(set);
+                _mesa_hash_table_insert(ctx->read_bos, bo, set);
+        }
+        _mesa_set_add(set, batch);
+}
+
+static void
+panfrost_batch_writes_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        assert(!_mesa_hash_table_search(ctx->write_bos, bo) ||
+               _mesa_hash_table_search(ctx->write_bos, bo)->data == batch);
+        _mesa_hash_table_insert(ctx->write_bos, bo, batch);
+}
+
 void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                      unsigned flags)
 {
         if (!bo)
                 return;
 
-        if (_mesa_set_search(batch->bos, bo))
+        if (flags & PAN_SHARED_BO_RD)
+                panfrost_batch_reads_bo(batch, bo);
+
+        if (flags & PAN_SHARED_BO_WR)
+                panfrost_batch_writes_bo(batch, bo);
+
+        if (!_mesa_set_search(batch->bos, bo)) {
+                panfrost_bo_reference(bo);
+                _mesa_set_add(batch->bos, bo);
+        }
+}
+
+void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
+{
+        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+                struct panfrost_resource *rsrc = pan_resource(batch->key.cbufs[i]->texture);
+                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
+	}
+
+        if (batch->key.zsbuf) {
+                struct panfrost_resource *rsrc = pan_resource(batch->key.zsbuf->texture);
+                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
+        }
+}
+
+void
+panfrost_flush_batches_reading_bo(struct panfrost_context *ctx,
+                                  struct panfrost_bo *bo, bool skip_cur)
+{
+        struct panfrost_batch *cur = panfrost_get_batch_for_fbo(ctx);
+        struct hash_entry *hentry;
+        hentry = _mesa_hash_table_search(ctx->read_bos, bo);
+        if (!hentry || !hentry->data)
                 return;
 
-        panfrost_bo_reference(bo);
-        _mesa_set_add(batch->bos, bo);
+        struct set *set = hentry->data;
+
+        set_foreach(set, sentry) {
+                if (skip_cur && sentry->key == cur)
+                        continue;
+                panfrost_batch_submit((struct panfrost_batch *)sentry->key);
+        }
+}
+
+void
+panfrost_flush_batch_writing_bo(struct panfrost_context *ctx,
+                                struct panfrost_bo *bo, bool skip_cur)
+{
+        struct panfrost_batch *cur = panfrost_get_batch_for_fbo(ctx);
+        struct hash_entry *entry;
+
+        entry = _mesa_hash_table_search(ctx->write_bos, bo);
+        if (!entry || !entry->data || (skip_cur && entry->data == cur))
+                return;
+
+        panfrost_batch_submit((struct panfrost_batch *)entry->data);
 }
 
 /* Returns the polygon list's GPU address if available, or otherwise allocates
@@ -305,6 +402,21 @@  out:
         panfrost_free_batch(batch);
 }
 
+void
+panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait)
+{
+        hash_table_foreach(ctx->batches, entry) {
+                struct panfrost_batch *batch = entry->data;
+                panfrost_batch_submit(batch);
+        }
+
+        assert(!ctx->batches->entries);
+
+        if (wait)
+                drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
+                               &ctx->out_sync, 1, INT64_MAX, 0, NULL);
+}
+
 void
 panfrost_batch_set_requirements(struct panfrost_batch *batch)
 {
@@ -509,4 +621,8 @@  panfrost_batch_init(struct panfrost_context *ctx)
         ctx->batches = _mesa_hash_table_create(ctx,
                                                panfrost_batch_hash,
                                                panfrost_batch_compare);
+        ctx->read_bos = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
+                                                _mesa_key_pointer_equal);
+        ctx->write_bos = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
+                                                 _mesa_key_pointer_equal);
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 48d483c9a724..db34464908f6 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -138,8 +138,27 @@  panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
 void
 panfrost_batch_init(struct panfrost_context *ctx);
 
+#define PAN_PRIVATE_BO          0
+#define PAN_SHARED_BO_RD        (1 << 0)
+#define PAN_SHARED_BO_WR        (1 << 1)
+#define PAN_SHARED_BO_RW        (PAN_SHARED_BO_RD | PAN_SHARED_BO_WR)
+
 void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo);
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                      unsigned flags);
+
+void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);
+
+void
+panfrost_flush_batches_reading_bo(struct panfrost_context *ctx,
+                                  struct panfrost_bo *bo, bool skip_cur);
+
+void
+panfrost_flush_batch_writing_bo(struct panfrost_context *ctx,
+                                struct panfrost_bo *bo, bool skip_cur);
+
+void
+panfrost_flush_all_batches(struct panfrost_context *ctx, bool wait);
 
 void
 panfrost_batch_submit(struct panfrost_batch *batch);
diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c
index 9dbd1e2ebacf..f501667289b9 100644
--- a/src/gallium/drivers/panfrost/pan_varyings.c
+++ b/src/gallium/drivers/panfrost/pan_varyings.c
@@ -69,7 +69,7 @@  panfrost_emit_streamout(
         /* Grab the BO and bind it to the batch */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
         struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
-        panfrost_batch_add_bo(batch, bo);
+        panfrost_batch_add_bo(batch, bo, PAN_SHARED_BO_WR);
 
         mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
         slot->elements = addr;

Comments

> --- a/src/gallium/drivers/panfrost/pan_fragment.c
> +++ b/src/gallium/drivers/panfrost/pan_fragment.c
> @@ -44,7 +44,7 @@ panfrost_initialize_surface(
>          rsrc->slices[level].initialized = true;
>  
>          assert(rsrc->bo);
> -        panfrost_batch_add_bo(batch, rsrc->bo);
> +        panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
>  }

This should be write-only. The corresponding read would be iff we're
wallpapering, so add an add_bo with RO in the wallpaper drawing routine.

I don't know if it really matters (since we can only have one write at a
time) but let's be precise.

-------------------------------

On that note, sometimes we stuff multiple related-but-independent
buffers within a single BO, particularly multiple miplevels/cubemap
faces/etc in one BO.  Hypothetically, it is legal to render to multiple
faces independently at once. In practice, I don't know if this case is
it is, we can of course split up the resource into per-face BOs.

>          _mesa_hash_table_remove_key(ctx->batches, &batch->key);
> +        util_unreference_framebuffer_state(&batch->key);

(Remind me where was the corresponding reference..?)

> +void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> +{
> +        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
> +                struct panfrost_resource *rsrc = pan_resource(batch->key.cbufs[i]->texture);
> +                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
> +	}
> +
> +        if (batch->key.zsbuf) {
> +                struct panfrost_resource *rsrc = pan_resource(batch->key.zsbuf->texture);
> +                panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
> +        }
> +}

As per above, these should be write-only. Also, is this duplicate from
the panfrost_batch_add_bo in panfrost_initialize_surface? It feels like
it. Which one is deadcode..?
On Thu, 5 Sep 2019 19:26:45 -0400
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:

> > --- a/src/gallium/drivers/panfrost/pan_fragment.c
> > +++ b/src/gallium/drivers/panfrost/pan_fragment.c
> > @@ -44,7 +44,7 @@ panfrost_initialize_surface(
> >          rsrc->slices[level].initialized = true;
> >  
> >          assert(rsrc->bo);
> > -        panfrost_batch_add_bo(batch, rsrc->bo);
> > +        panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RW);
> >  }  
> 
> This should be write-only. The corresponding read would be iff we're
> wallpapering, so add an add_bo with RO in the wallpaper drawing
> routine.

Actually we can't do that in the wallpaper draw, it's too late (the
wallpaper draw happens at flush time, and adding the BO when we're
already flushing the batch is pointless). 

> 
> I don't know if it really matters (since we can only have one write
> at a time) but let's be precise.

That's true, marking the BO for read access is useless when it's
already flagged for write since a write will anyway force batches that
want to read or write this BO to flush. If we really want to be precise
(for debug purpose I guess), we should probably have:

   panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_WR);
   if (!batch->clear)
      panfrost_batch_add_bo(batch, rsrc->bo, PAN_SHARED_BO_RD);

> 
> -------------------------------
> 
> On that note, sometimes we stuff multiple related-but-independent
> buffers within a single BO, particularly multiple miplevels/cubemap
> faces/etc in one BO.  Hypothetically, it is legal to render to
> multiple faces independently at once. In practice, I don't know if
> this case is it is, we can of course split up the resource into
> per-face BOs.

I guess we'd have to introduce the concept of BO regions and only
force a flush when things overlap, assuming we want to keep those
independent buffers stored in the same BO of course.

> 
> >          _mesa_hash_table_remove_key(ctx->batches, &batch->key);
> > +        util_unreference_framebuffer_state(&batch->key);  
> 
> (Remind me where was the corresponding reference..?)

Duh, should be moved to patch 11 ("panfrost: Use a
pipe_framebuffer_state as the batch key").

> 
> > +void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
> > +{
> > +        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
> > +                struct panfrost_resource *rsrc =
> > pan_resource(batch->key.cbufs[i]->texture);
> > +                panfrost_batch_add_bo(batch, rsrc->bo,
> > PAN_SHARED_BO_RW);
> > +	}
> > +
> > +        if (batch->key.zsbuf) {
> > +                struct panfrost_resource *rsrc =
> > pan_resource(batch->key.zsbuf->texture);
> > +                panfrost_batch_add_bo(batch, rsrc->bo,
> > PAN_SHARED_BO_RW);
> > +        }
> > +}  
> 
> As per above, these should be write-only. Also, is this duplicate from
> the panfrost_batch_add_bo in panfrost_initialize_surface? It feels
> like it. Which one is deadcode..?

We only draw the wallpaper on cbufs[0] right now, so I guess we can use
BO_WR here.