[1/2] panfrost: Track BO lifetime with jobs and reference counts

Submitted by Alyssa Rosenzweig on April 16, 2019, 1:49 a.m.

Details

Message ID 20190416014922.4474-2-alyssa@rosenzweig.io
State New
Headers show
Series "Avoid costly vertex staging buffer" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Alyssa Rosenzweig April 16, 2019, 1:49 a.m.
This (fairly large) patch continues work surrounding the panfrost_job
abstraction to improve job lifetime management. In particular, we add
infrastructure to track which BOs are used by a particular job
(currently limited to the vertex buffer BOs), to reference count these
BOs, and to automatically manage the BOs memory based on the reference
count. This set of changes serves as a code cleanup, as a way of future
proofing for allowing flushing BOs, and immediately as a bugfix to
workaround the missing reference counting for vertex buffer BOs.
Meanwhile, there are a few cleanups to vertex buffer handling code
itself, so in the short-term, this allows us to remove the costly VBO
staging workaround, since this patch addresses the underlying causes.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
---
 src/gallium/drivers/panfrost/pan_context.c  | 37 ++++------
 src/gallium/drivers/panfrost/pan_context.h  |  6 +-
 src/gallium/drivers/panfrost/pan_drm.c      |  6 ++
 src/gallium/drivers/panfrost/pan_job.c      | 82 +++++++++++++++++++++
 src/gallium/drivers/panfrost/pan_job.h      | 16 ++++
 src/gallium/drivers/panfrost/pan_resource.c | 70 +++++++++++++++++-
 src/gallium/drivers/panfrost/pan_resource.h | 13 ++++
 src/gallium/drivers/panfrost/pan_screen.c   |  2 +-
 src/gallium/drivers/panfrost/pan_screen.h   |  5 +-
 9 files changed, 208 insertions(+), 29 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 0c97c0a2b43..75a975871db 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -36,6 +36,8 @@ 
 #include "util/u_memory.h"
 #include "util/u_vbuf.h"
 #include "util/half_float.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
 #include "indices/u_primconvert.h"
 #include "tgsi/tgsi_parse.h"
 
@@ -737,7 +739,7 @@  panfrost_emit_varying_descriptor(
  * excepting some obscure circumstances */
 
 static void
-panfrost_emit_vertex_data(struct panfrost_context *ctx)
+panfrost_emit_vertex_data(struct panfrost_context *ctx, struct panfrost_job *job)
 {
         /* TODO: Only update the dirtied buffers */
         union mali_attr attrs[PIPE_MAX_ATTRIBS];
@@ -745,6 +747,8 @@  panfrost_emit_vertex_data(struct panfrost_context *ctx)
         unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
 
         for (int i = 0; i < ctx->vertex_buffer_count; ++i) {
+                if (!(ctx->vb_mask & (1 << i))) continue;
+
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
                 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
 
@@ -776,9 +780,10 @@  panfrost_emit_vertex_data(struct panfrost_context *ctx)
 
                 mali_ptr effective_address = rsrc ? (rsrc->bo->gpu + buf->buffer_offset) : 0;
 
-                if (effective_address) {
+                if (effective_address & 63) {
                         attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset, attrs[i].size) | MALI_ATTR_LINEAR;
                 } else if (effective_address) {
+                        panfrost_job_add_bo(job, rsrc->bo);
                         attrs[i].elements = effective_address | MALI_ATTR_LINEAR;
                 } else {
                         /* Leave unset? */
@@ -807,7 +812,7 @@  panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
         if (with_vertex_data) {
-                panfrost_emit_vertex_data(ctx);
+                panfrost_emit_vertex_data(ctx, job);
         }
 
         bool msaa = ctx->rasterizer->base.multisample;
@@ -1252,7 +1257,8 @@  panfrost_link_jobs(struct panfrost_context *ctx)
 
 static void
 panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
-		      struct pipe_fence_handle **fence)
+		      struct pipe_fence_handle **fence,
+                      struct panfrost_job *job)
 {
         struct pipe_context *gallium = (struct pipe_context *) ctx;
         struct panfrost_screen *screen = pan_screen(gallium->screen);
@@ -1274,15 +1280,15 @@  panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
 #ifndef DRY_RUN
         
         bool is_scanout = panfrost_is_scanout(ctx);
-        int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
+        screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
 
         /* If visual, we can stall a frame */
 
         if (!flush_immediate)
                 screen->driver->force_flush_fragment(ctx, fence);
 
-        screen->last_fragment_id = fragment_id;
         screen->last_fragment_flushed = false;
+        screen->last_job = job;
 
         /* If readback, flush now (hurts the pipelined performance) */
         if (flush_immediate)
@@ -1317,7 +1323,7 @@  panfrost_flush(
         bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
 
         /* Submit the frame itself */
-        panfrost_submit_frame(ctx, flush_immediate, fence);
+        panfrost_submit_frame(ctx, flush_immediate, fence, job);
 
         /* Prepare for the next frame */
         panfrost_invalidate_frame(ctx);
@@ -1793,22 +1799,9 @@  panfrost_set_vertex_buffers(
         const struct pipe_vertex_buffer *buffers)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-        assert(num_buffers <= PIPE_MAX_ATTRIBS);
-
-        /* XXX: Dirty tracking? etc */
-        if (buffers) {
-                size_t sz = sizeof(buffers[0]) * num_buffers;
-                ctx->vertex_buffers = malloc(sz);
-                ctx->vertex_buffer_count = num_buffers;
-                memcpy(ctx->vertex_buffers, buffers, sz);
-        } else {
-                if (ctx->vertex_buffers) {
-                        free(ctx->vertex_buffers);
-                        ctx->vertex_buffers = NULL;
-                }
 
-                ctx->vertex_buffer_count = 0;
-        }
+        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
+        ctx->vertex_buffer_count = num_buffers;
 }
 
 static void
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index d071da1c62f..e45f98c9b77 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,9 @@  struct panfrost_context {
         struct panfrost_job *job;
         struct hash_table *jobs;
 
+        /* panfrost_resource -> panfrost_job */
+        struct hash_table *write_jobs;
+
         /* Bit mask for supported PIPE_DRAW for this hardware */
         unsigned draw_modes;
 
@@ -187,8 +190,9 @@  struct panfrost_context {
 
         struct panfrost_vertex_state *vertex;
 
-        struct pipe_vertex_buffer *vertex_buffers;
+        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
         unsigned vertex_buffer_count;
+        uint32_t vb_mask;
 
         struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
         unsigned sampler_count[PIPE_SHADER_TYPES];
diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
index 70d8d7498d4..5fecd4fb27e 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -1,5 +1,6 @@ 
 /*
  * © Copyright 2019 Collabora, Ltd.
+ * Copyright 2019 Alyssa Rosenzweig
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -138,6 +139,7 @@  panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *wha
 
 	bo->gem_handle = gem_handle;
         bo->gpu = (mali_ptr) get_bo_offset.offset;
+        panfrost_bo_reference(bo);
 
 	// TODO map and unmap on demand?
 	mmap_bo.handle = gem_handle;
@@ -227,6 +229,7 @@  panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, st
 	}
 
 	/* TODO: Add here the transient pools */
+        /* TODO: Add here the BOs listed in the panfrost_job */
 	bo_handles[submit.bo_handle_count++] = ctx->shaders.gem_handle;
 	bo_handles[submit.bo_handle_count++] = ctx->scratchpad.gem_handle;
 	bo_handles[submit.bo_handle_count++] = ctx->tiler_heap.gem_handle;
@@ -303,6 +306,9 @@  panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
         if (!screen->last_fragment_flushed) {
 		drmSyncobjWait(drm->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
                 screen->last_fragment_flushed = true;
+
+                /* The job finished up, so we're safe to clean it up now */
+                panfrost_free_job(ctx, screen->last_job);
 	}
 
         if (fence) {
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 66a8b0d4b07..6c68575158f 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -27,6 +27,13 @@ 
 #include "util/hash_table.h"
 #include "util/ralloc.h"
 
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+        _mesa_hash_table_remove(ht, entry);
+}
+
 struct panfrost_job *
 panfrost_create_job(struct panfrost_context *ctx)
 {
@@ -35,9 +42,32 @@  panfrost_create_job(struct panfrost_context *ctx)
 
         job->ctx = ctx;
 
+        job->bos = _mesa_set_create(job,
+                                    _mesa_hash_pointer,
+                                    _mesa_key_pointer_equal);
+ 
         return job;
 }
 
+void
+panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job)
+{
+        if (!job)
+                return;
+
+        set_foreach(job->bos, entry) {
+                struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                panfrost_bo_unreference(ctx->base.screen, bo);
+        }
+
+        remove_from_ht(ctx->jobs, &job->key);
+
+        if (ctx->job == job)
+                ctx->job = NULL;
+
+        ralloc_free(job);
+}
+
 struct panfrost_job *
 panfrost_get_job(struct panfrost_context *ctx,
                 struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
@@ -90,6 +120,53 @@  panfrost_get_job_for_fbo(struct panfrost_context *ctx)
         return job;
 }
 
+void
+panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo)
+{
+        if (!bo)
+                return;
+
+        if (_mesa_set_search(job->bos, bo))
+                return;
+
+        panfrost_bo_reference(bo);
+        _mesa_set_add(job->bos, bo);
+}
+
+void
+panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc)
+{
+#if 0
+        struct hash_entry *entry = _mesa_hash_table_search(panfrost->write_jobs,
+                                                           prsc);
+        if (entry) {
+                struct panfrost_job *job = entry->data;
+                panfrost_job_submit(panfrost, job);
+        }
+#endif
+        /* TODO stub */
+}
+
+void
+panfrost_flush_jobs_reading_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc)
+{
+        struct panfrost_resource *rsc = pan_resource(prsc);
+
+        panfrost_flush_jobs_writing_resource(panfrost, prsc);
+
+        hash_table_foreach(panfrost->jobs, entry) {
+                struct panfrost_job *job = entry->data;
+
+                if (_mesa_set_search(job->bos, rsc->bo)) {
+                        printf("TODO: submit job for flush\n");
+                        //panfrost_job_submit(panfrost, job);
+                        continue;
+                }
+        }
+}
+
 static bool
 panfrost_job_compare(const void *a, const void *b)
 {
@@ -109,4 +186,9 @@  panfrost_job_init(struct panfrost_context *ctx)
         ctx->jobs = _mesa_hash_table_create(NULL,
                                             panfrost_job_hash,
                                             panfrost_job_compare);
+
+        ctx->write_jobs = _mesa_hash_table_create(NULL,
+                                            _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
+
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 30f1cf4bd5c..1b28084c599 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -55,6 +55,8 @@  struct panfrost_job {
          * bitmask) */
         unsigned requirements;
 
+        /* BOs referenced -- will be used for flushing logic */
+        struct set *bos;
 };
 
 /* Functions for managing the above */
@@ -62,6 +64,9 @@  struct panfrost_job {
 struct panfrost_job *
 panfrost_create_job(struct panfrost_context *ctx);
 
+void
+panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job *job);
+
 struct panfrost_job *
 panfrost_get_job(struct panfrost_context *ctx,
                 struct pipe_surface **cbufs, struct pipe_surface *zsbuf);
@@ -72,4 +77,15 @@  panfrost_get_job_for_fbo(struct panfrost_context *ctx);
 void
 panfrost_job_init(struct panfrost_context *ctx);
 
+void
+panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo *bo);
+
+void
+panfrost_flush_jobs_writing_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc);
+
+void
+panfrost_flush_jobs_reading_resource(struct panfrost_context *panfrost,
+                                struct pipe_resource *prsc);
+
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c
index 15d522f1963..a0fc64fa1c1 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -228,6 +228,7 @@  static struct panfrost_bo *
 panfrost_create_bo(struct panfrost_screen *screen, const struct pipe_resource *template)
 {
 	struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
+        panfrost_bo_reference(bo);
 
         /* Based on the usage, figure out what storing will be used. There are
          * various tradeoffs:
@@ -297,6 +298,8 @@  panfrost_resource_create(struct pipe_screen *screen,
                         assert(0);
         }
 
+        util_range_init(&so->valid_buffer_range);
+
         if (template->bind & PIPE_BIND_DISPLAY_TARGET ||
             template->bind & PIPE_BIND_SCANOUT ||
             template->bind & PIPE_BIND_SHARED) {
@@ -359,6 +362,25 @@  panfrost_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *pbo)
         }
 }
 
+void
+panfrost_bo_reference(struct panfrost_bo *bo)
+{
+        p_atomic_inc(&bo->reference_count);
+}
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo)
+{
+        /* Check for double free errors */
+        assert(bo->reference_count);
+
+        /* When the reference count goes to zero, we need to cleanup */
+
+        if (p_atomic_dec_zero(&bo->reference_count)) {
+                panfrost_destroy_bo(pan_screen(screen), bo);
+        }
+}
+
 static void
 panfrost_resource_destroy(struct pipe_screen *screen,
                           struct pipe_resource *pt)
@@ -370,8 +392,9 @@  panfrost_resource_destroy(struct pipe_screen *screen,
 		renderonly_scanout_destroy(rsrc->scanout, pscreen->ro);
 
 	if (rsrc->bo)
-		panfrost_destroy_bo(pscreen, rsrc->bo);
+                panfrost_bo_unreference(screen, rsrc->bo);
 
+        util_range_destroy(&rsrc->valid_buffer_range);
 	FREE(rsrc);
 }
 
@@ -384,7 +407,8 @@  panfrost_transfer_map(struct pipe_context *pctx,
                       struct pipe_transfer **out_transfer)
 {
         int bytes_per_pixel = util_format_get_blocksize(resource->format);
-        struct panfrost_bo *bo = pan_resource(resource)->bo;
+        struct panfrost_resource *rsrc = pan_resource(resource);
+        struct panfrost_bo *bo = rsrc->bo;
 
         struct panfrost_gtransfer *transfer = CALLOC_STRUCT(panfrost_gtransfer);
         transfer->base.level = level;
@@ -405,6 +429,25 @@  panfrost_transfer_map(struct pipe_context *pctx,
                 panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
         }
 
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+                /* TODO: reallocate */
+                //printf("debug: Missed reallocate\n");
+        } else if ((usage & PIPE_TRANSFER_WRITE)
+                        && resource->target == PIPE_BUFFER
+                        && !util_ranges_intersect(&rsrc->valid_buffer_range, box->x, box->x + box->width)) {
+                /* No flush for writes to uninitialized */
+        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                if (usage & PIPE_TRANSFER_WRITE) {
+                        /* STUB: flush reading */
+                        //printf("debug: missed reading flush %d\n", resource->target);
+                } else if (usage & PIPE_TRANSFER_READ) {
+                        /* STUB: flush writing */
+                        //printf("debug: missed writing flush %d (%d-%d)\n", resource->target, box->x, box->x + box->width);
+                } else {
+                        /* Why are you even mapping?! */
+                }
+        }
+
         if (bo->layout != PAN_LINEAR) {
                 /* Non-linear resources need to be indirectly mapped */
 
@@ -460,9 +503,9 @@  panfrost_transfer_unmap(struct pipe_context *pctx,
         /* Gallium expects writeback here, so we tile */
 
         struct panfrost_gtransfer *trans = pan_transfer(transfer);
+        struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
 
         if (trans->map) {
-                struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
                 struct panfrost_bo *bo = prsrc->bo;
 
                 if (transfer->usage & PIPE_TRANSFER_WRITE) {
@@ -480,6 +523,11 @@  panfrost_transfer_unmap(struct pipe_context *pctx,
                 free(trans->map);
         }
 
+
+	util_range_add(&prsrc->valid_buffer_range,
+                        transfer->box.x,
+                        transfer->box.x + transfer->box.width);
+
         /* Derefence the resource */
         pipe_resource_reference(&transfer->resource, NULL);
 
@@ -487,6 +535,20 @@  panfrost_transfer_unmap(struct pipe_context *pctx,
         free(transfer);
 }
 
+static void
+panfrost_transfer_flush_region(struct pipe_context *pctx,
+		struct pipe_transfer *transfer,
+		const struct pipe_box *box)
+{
+	struct panfrost_resource *rsc = pan_resource(transfer->resource);
+
+	if (transfer->resource->target == PIPE_BUFFER) {
+		util_range_add(&rsc->valid_buffer_range,
+					   transfer->box.x + box->x,
+					   transfer->box.x + box->x + box->width);
+        }
+}
+
 static struct pb_slab *
 panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
 {
@@ -564,7 +626,7 @@  static const struct u_transfer_vtbl transfer_vtbl = {
         .resource_destroy         = panfrost_resource_destroy,
         .transfer_map             = panfrost_transfer_map,
         .transfer_unmap           = panfrost_transfer_unmap,
-        .transfer_flush_region    = u_default_transfer_flush_region,
+        .transfer_flush_region    = panfrost_transfer_flush_region,
         .get_internal_format      = panfrost_resource_get_internal_format,
         .set_stencil              = panfrost_resource_set_stencil,
         .get_stencil              = panfrost_resource_get_stencil,
diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h
index a1315ab1b43..b9d6cd2b023 100644
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -30,6 +30,7 @@ 
 #include "pan_screen.h"
 #include "pan_allocate.h"
 #include "drm-uapi/drm.h"
+#include "util/u_range.h"
 
 /* Describes the memory layout of a BO */
 
@@ -45,6 +46,10 @@  struct panfrost_slice {
 };
 
 struct panfrost_bo {
+        /* Reference count */
+        unsigned reference_count;
+
+        /* Description of the mip levels */
         struct panfrost_slice slices[MAX_MIP_LEVELS];
 
         /* Mapping for the entire object (all levels) */
@@ -83,6 +88,12 @@  struct panfrost_bo {
         int gem_handle;
 };
 
+void
+panfrost_bo_reference(struct panfrost_bo *bo);
+
+void
+panfrost_bo_unreference(struct pipe_screen *screen, struct panfrost_bo *bo);
+
 struct panfrost_resource {
         struct pipe_resource base;
 
@@ -90,6 +101,8 @@  struct panfrost_resource {
         struct renderonly_scanout *scanout;
 
         struct panfrost_resource *separate_stencil;
+
+        struct util_range valid_buffer_range;
 };
 
 static inline struct panfrost_resource *
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index 71c6175d069..6d3aca594f1 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -597,8 +597,8 @@  panfrost_create_screen(int fd, struct renderonly *ro)
         screen->base.fence_reference = panfrost_fence_reference;
         screen->base.fence_finish = panfrost_fence_finish;
 
-	screen->last_fragment_id = -1;
 	screen->last_fragment_flushed = true;
+        screen->last_job = NULL;
 
         panfrost_resource_screen_init(screen);
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index cbadf813675..4e5efa91f22 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -92,8 +92,11 @@  struct panfrost_screen {
         /* TODO: Where? */
         struct panfrost_resource *display_target;
 
-	int last_fragment_id;
+        /* While we're busy building up the job for frame N, the GPU is
+         * still busy executing frame N-1. So hold a reference to
+         * yesterjob */
 	int last_fragment_flushed;
+        struct panfrost_job *last_job;
 };
 
 #endif /* PAN_SCREEN_H */

Comments

On Tue, 16 Apr 2019 01:49:21 +0000
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:

> @@ -1793,22 +1799,9 @@ panfrost_set_vertex_buffers(
>          const struct pipe_vertex_buffer *buffers)
>  {
>          struct panfrost_context *ctx = pan_context(pctx);
> -        assert(num_buffers <= PIPE_MAX_ATTRIBS);
> -
> -        /* XXX: Dirty tracking? etc */
> -        if (buffers) {
> -                size_t sz = sizeof(buffers[0]) * num_buffers;
> -                ctx->vertex_buffers = malloc(sz);
> -                ctx->vertex_buffer_count = num_buffers;
> -                memcpy(ctx->vertex_buffers, buffers, sz);
> -        } else {
> -                if (ctx->vertex_buffers) {
> -                        free(ctx->vertex_buffers);
> -                        ctx->vertex_buffers = NULL;
> -                }
>  
> -                ctx->vertex_buffer_count = 0;
> -        }
> +        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
> +        ctx->vertex_buffer_count = num_buffers;

->vertex_buffer_count should be set to fls(ctx->vb_mask) (fls == find
last bit set) if you want the

	for (int i = 0; i < ctx->vertex_buffer_count; ++i)

loop in panfrost_emit_vertex_data() to do the right thing. But I think
we can get rid of ->vertex_buffer_count entirely and just do a

	for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i)

>  }
On Tue, 16 Apr 2019 09:31:46 +0200
Boris Brezillon <boris.brezillon@collabora.com> wrote:

> On Tue, 16 Apr 2019 01:49:21 +0000
> Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
> 
> > @@ -1793,22 +1799,9 @@ panfrost_set_vertex_buffers(
> >          const struct pipe_vertex_buffer *buffers)
> >  {
> >          struct panfrost_context *ctx = pan_context(pctx);
> > -        assert(num_buffers <= PIPE_MAX_ATTRIBS);
> > -
> > -        /* XXX: Dirty tracking? etc */
> > -        if (buffers) {
> > -                size_t sz = sizeof(buffers[0]) * num_buffers;
> > -                ctx->vertex_buffers = malloc(sz);
> > -                ctx->vertex_buffer_count = num_buffers;
> > -                memcpy(ctx->vertex_buffers, buffers, sz);
> > -        } else {
> > -                if (ctx->vertex_buffers) {
> > -                        free(ctx->vertex_buffers);
> > -                        ctx->vertex_buffers = NULL;
> > -                }
> >  
> > -                ctx->vertex_buffer_count = 0;
> > -        }
> > +        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);

Had a look at the cso code (more precisely
cso_{save,restore}_vertex_buffer0() and their users) and I'm not sure
util_set_vertex_buffers_mask() does what's expected by the CSO layer
(don't know who's right about the ->set_vertex_buffers() semantic
though).

My understanding is that CSO just saves/restores buffer in slot 0 and
expects other slots to be unchanged, but util_set_vertex_buffers_mask()
marks all buffers outside of the 'start_slot -> start_slot+num_buffers'
range as inactive.

So, we should either make sure ->set_vertex_buffers() only modifies the
status of the bufs in the 'start_slot -> start_slot+num_buffers' range
(and force callers of ->set_vertex_buffers() to explicitly invalidate
bufs if they need to keep only this specific range active) or patch the
cso code to save/restore all active buffer slots.

> > +        ctx->vertex_buffer_count = num_buffers;  
> 
> ->vertex_buffer_count should be set to fls(ctx->vb_mask) (fls == find  
> last bit set) if you want the

Looks like it's called util_last_bit() in mesa.

> 
> 	for (int i = 0; i < ctx->vertex_buffer_count; ++i)
> 
> loop in panfrost_emit_vertex_data() to do the right thing. But I think
> we can get rid of ->vertex_buffer_count entirely and just do a
> 
> 	for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i)
> 
> >  }
On Tue, 16 Apr 2019 01:49:21 +0000
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:

> @@ -745,6 +747,8 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
>          unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
>  
>          for (int i = 0; i < ctx->vertex_buffer_count; ++i) {
> +                if (!(ctx->vb_mask & (1 << i))) continue;
> +
>                  struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
>                  struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
>  

It's outside of the diff context, but we shouldn't use i as the index
for attrs[] as the first active vertex buffers might be != 0 and vb_mask
might be sparse (my patch had the same issue).
On Tue, 2019-04-16 at 01:49 +0000, Alyssa Rosenzweig wrote:
> This (fairly large) patch continues work surrounding the panfrost_job
> abstraction to improve job lifetime management. In particular, we add
> infrastructure to track which BOs are used by a particular job
> (currently limited to the vertex buffer BOs), to reference count
> these
> BOs, and to automatically manage the BOs memory based on the
> reference
> count. This set of changes serves as a code cleanup, as a way of
> future
> proofing for allowing flushing BOs, and immediately as a bugfix to
> workaround the missing reference counting for vertex buffer BOs.
> Meanwhile, there are a few cleanups to vertex buffer handling code
> itself, so in the short-term, this allows us to remove the costly VBO
> staging workaround, since this patch addresses the underlying causes.
> 
> Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
> ---
>  src/gallium/drivers/panfrost/pan_context.c  | 37 ++++------
>  src/gallium/drivers/panfrost/pan_context.h  |  6 +-
>  src/gallium/drivers/panfrost/pan_drm.c      |  6 ++
>  src/gallium/drivers/panfrost/pan_job.c      | 82
> +++++++++++++++++++++
>  src/gallium/drivers/panfrost/pan_job.h      | 16 ++++
>  src/gallium/drivers/panfrost/pan_resource.c | 70 +++++++++++++++++-
>  src/gallium/drivers/panfrost/pan_resource.h | 13 ++++
>  src/gallium/drivers/panfrost/pan_screen.c   |  2 +-
>  src/gallium/drivers/panfrost/pan_screen.h   |  5 +-
>  9 files changed, 208 insertions(+), 29 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_context.c
> b/src/gallium/drivers/panfrost/pan_context.c
> index 0c97c0a2b43..75a975871db 100644
> --- a/src/gallium/drivers/panfrost/pan_context.c
> +++ b/src/gallium/drivers/panfrost/pan_context.c
> @@ -36,6 +36,8 @@
>  #include "util/u_memory.h"
>  #include "util/u_vbuf.h"
>  #include "util/half_float.h"
> +#include "util/u_helpers.h"
> +#include "util/u_format.h"
>  #include "indices/u_primconvert.h"
>  #include "tgsi/tgsi_parse.h"
>  
> @@ -737,7 +739,7 @@ panfrost_emit_varying_descriptor(
>   * excepting some obscure circumstances */
>  
>  static void
> -panfrost_emit_vertex_data(struct panfrost_context *ctx)
> +panfrost_emit_vertex_data(struct panfrost_context *ctx, struct
> panfrost_job *job)
>  {
>          /* TODO: Only update the dirtied buffers */
>          union mali_attr attrs[PIPE_MAX_ATTRIBS];
> @@ -745,6 +747,8 @@ panfrost_emit_vertex_data(struct panfrost_context
> *ctx)
>          unsigned invocation_count = MALI_NEGATIVE(ctx-
> >payload_tiler.prefix.invocation_count);
>  
>          for (int i = 0; i < ctx->vertex_buffer_count; ++i) {
> +                if (!(ctx->vb_mask & (1 << i))) continue;
> +
>                  struct pipe_vertex_buffer *buf = &ctx-
> >vertex_buffers[i];
>                  struct panfrost_resource *rsrc = (struct
> panfrost_resource *) (buf->buffer.resource);
>  
> @@ -776,9 +780,10 @@ panfrost_emit_vertex_data(struct
> panfrost_context *ctx)
>  
>                  mali_ptr effective_address = rsrc ? (rsrc->bo->gpu +
> buf->buffer_offset) : 0;
>  
> -                if (effective_address) {
> +                if (effective_address & 63) {
>                          attrs[i].elements =
> panfrost_upload_transient(ctx, rsrc->bo->cpu + buf->buffer_offset,
> attrs[i].size) | MALI_ATTR_LINEAR;
>                  } else if (effective_address) {
> +                        panfrost_job_add_bo(job, rsrc->bo);
>                          attrs[i].elements = effective_address |
> MALI_ATTR_LINEAR;
>                  } else {
>                          /* Leave unset? */
> @@ -807,7 +812,7 @@ panfrost_emit_for_draw(struct panfrost_context
> *ctx, bool with_vertex_data)
>          struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
>  
>          if (with_vertex_data) {
> -                panfrost_emit_vertex_data(ctx);
> +                panfrost_emit_vertex_data(ctx, job);
>          }
>  
>          bool msaa = ctx->rasterizer->base.multisample;
> @@ -1252,7 +1257,8 @@ panfrost_link_jobs(struct panfrost_context
> *ctx)
>  
>  static void
>  panfrost_submit_frame(struct panfrost_context *ctx, bool
> flush_immediate,
> -		      struct pipe_fence_handle **fence)
> +		      struct pipe_fence_handle **fence,
> +                      struct panfrost_job *job)
>  {
>          struct pipe_context *gallium = (struct pipe_context *) ctx;
>          struct panfrost_screen *screen = pan_screen(gallium-
> >screen);
> @@ -1274,15 +1280,15 @@ panfrost_submit_frame(struct panfrost_context
> *ctx, bool flush_immediate,
>  #ifndef DRY_RUN
>          
>          bool is_scanout = panfrost_is_scanout(ctx);
> -        int fragment_id = screen->driver->submit_vs_fs_job(ctx,
> has_draws, is_scanout);
> +        screen->driver->submit_vs_fs_job(ctx, has_draws,
> is_scanout);
>  
>          /* If visual, we can stall a frame */
>  
>          if (!flush_immediate)
>                  screen->driver->force_flush_fragment(ctx, fence);
>  
> -        screen->last_fragment_id = fragment_id;
>          screen->last_fragment_flushed = false;
> +        screen->last_job = job;
>  
>          /* If readback, flush now (hurts the pipelined performance)
> */
>          if (flush_immediate)
> @@ -1317,7 +1323,7 @@ panfrost_flush(
>          bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
>  
>          /* Submit the frame itself */
> -        panfrost_submit_frame(ctx, flush_immediate, fence);
> +        panfrost_submit_frame(ctx, flush_immediate, fence, job);
>  
>          /* Prepare for the next frame */
>          panfrost_invalidate_frame(ctx);
> @@ -1793,22 +1799,9 @@ panfrost_set_vertex_buffers(
>          const struct pipe_vertex_buffer *buffers)
>  {
>          struct panfrost_context *ctx = pan_context(pctx);
> -        assert(num_buffers <= PIPE_MAX_ATTRIBS);
> -
> -        /* XXX: Dirty tracking? etc */
> -        if (buffers) {
> -                size_t sz = sizeof(buffers[0]) * num_buffers;
> -                ctx->vertex_buffers = malloc(sz);
> -                ctx->vertex_buffer_count = num_buffers;
> -                memcpy(ctx->vertex_buffers, buffers, sz);
> -        } else {
> -                if (ctx->vertex_buffers) {
> -                        free(ctx->vertex_buffers);
> -                        ctx->vertex_buffers = NULL;
> -                }
>  
> -                ctx->vertex_buffer_count = 0;
> -        }
> +        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx-
> >vb_mask, buffers, start_slot, num_buffers);
> +        ctx->vertex_buffer_count = num_buffers;
>  }
>  
>  static void
> diff --git a/src/gallium/drivers/panfrost/pan_context.h
> b/src/gallium/drivers/panfrost/pan_context.h
> index d071da1c62f..e45f98c9b77 100644
> --- a/src/gallium/drivers/panfrost/pan_context.h
> +++ b/src/gallium/drivers/panfrost/pan_context.h
> @@ -114,6 +114,9 @@ struct panfrost_context {
>          struct panfrost_job *job;
>          struct hash_table *jobs;
>  
> +        /* panfrost_resource -> panfrost_job */
> +        struct hash_table *write_jobs;
> +
>          /* Bit mask for supported PIPE_DRAW for this hardware */
>          unsigned draw_modes;
>  
> @@ -187,8 +190,9 @@ struct panfrost_context {
>  
>          struct panfrost_vertex_state *vertex;
>  
> -        struct pipe_vertex_buffer *vertex_buffers;
> +        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
>          unsigned vertex_buffer_count;
> +        uint32_t vb_mask;
>  
>          struct panfrost_sampler_state
> *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
>          unsigned sampler_count[PIPE_SHADER_TYPES];
> diff --git a/src/gallium/drivers/panfrost/pan_drm.c
> b/src/gallium/drivers/panfrost/pan_drm.c
> index 70d8d7498d4..5fecd4fb27e 100644
> --- a/src/gallium/drivers/panfrost/pan_drm.c
> +++ b/src/gallium/drivers/panfrost/pan_drm.c
> @@ -1,5 +1,6 @@
>  /*
>   * © Copyright 2019 Collabora, Ltd.
> + * Copyright 2019 Alyssa Rosenzweig
>   *
>   * Permission is hereby granted, free of charge, to any person
> obtaining a
>   * copy of this software and associated documentation files (the
> "Software"),
> @@ -138,6 +139,7 @@ panfrost_drm_import_bo(struct panfrost_screen
> *screen, struct winsys_handle *wha
>  
>  	bo->gem_handle = gem_handle;
>          bo->gpu = (mali_ptr) get_bo_offset.offset;
> +        panfrost_bo_reference(bo);
>  
>  	// TODO map and unmap on demand?
>  	mmap_bo.handle = gem_handle;
> @@ -227,6 +229,7 @@ panfrost_drm_submit_job(struct panfrost_context
> *ctx, u64 job_desc, int reqs, st
>  	}
>  
>  	/* TODO: Add here the transient pools */
> +        /* TODO: Add here the BOs listed in the panfrost_job */
>  	bo_handles[submit.bo_handle_count++] = ctx->shaders.gem_handle;
>  	bo_handles[submit.bo_handle_count++] = ctx-
> >scratchpad.gem_handle;
>  	bo_handles[submit.bo_handle_count++] = ctx-
> >tiler_heap.gem_handle;
> @@ -303,6 +306,9 @@ panfrost_drm_force_flush_fragment(struct
> panfrost_context *ctx,
>          if (!screen->last_fragment_flushed) {
>  		drmSyncobjWait(drm->fd, &ctx->out_sync, 1, INT64_MAX,
> 0, NULL);
>                  screen->last_fragment_flushed = true;
> +
> +                /* The job finished up, so we're safe to clean it up
> now */
> +                panfrost_free_job(ctx, screen->last_job);
>  	}
>  
>          if (fence) {
> diff --git a/src/gallium/drivers/panfrost/pan_job.c
> b/src/gallium/drivers/panfrost/pan_job.c
> index 66a8b0d4b07..6c68575158f 100644
> --- a/src/gallium/drivers/panfrost/pan_job.c
> +++ b/src/gallium/drivers/panfrost/pan_job.c
> @@ -27,6 +27,13 @@
>  #include "util/hash_table.h"
>  #include "util/ralloc.h"
>  
> +static void
> +remove_from_ht(struct hash_table *ht, void *key)
> +{
> +        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
> +        _mesa_hash_table_remove(ht, entry);
> +}
> +

This is the same as _mesa_hash_table_remove_key(), no?

>  struct panfrost_job *
>  panfrost_create_job(struct panfrost_context *ctx)
>  {
> @@ -35,9 +42,32 @@ panfrost_create_job(struct panfrost_context *ctx)
>  
>          job->ctx = ctx;
>  
> +        job->bos = _mesa_set_create(job,
> +                                    _mesa_hash_pointer,
> +                                    _mesa_key_pointer_equal);
> + 
>          return job;
>  }
>  
> +void
> +panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job
> *job)
> +{
> +        if (!job)
> +                return;
> +
> +        set_foreach(job->bos, entry) {
> +                struct panfrost_bo *bo = (struct panfrost_bo
> *)entry->key;
> +                panfrost_bo_unreference(ctx->base.screen, bo);
> +        }
> +
> +        remove_from_ht(ctx->jobs, &job->key);
> +
> +        if (ctx->job == job)
> +                ctx->job = NULL;
> +
> +        ralloc_free(job);
> +}
> +
>  struct panfrost_job *
>  panfrost_get_job(struct panfrost_context *ctx,
>                  struct pipe_surface **cbufs, struct pipe_surface
> *zsbuf)
> @@ -90,6 +120,53 @@ panfrost_get_job_for_fbo(struct panfrost_context
> *ctx)
>          return job;
>  }
>  
> +void
> +panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo
> *bo)
> +{
> +        if (!bo)
> +                return;
> +
> +        if (_mesa_set_search(job->bos, bo))
> +                return;
> +
> +        panfrost_bo_reference(bo);
> +        _mesa_set_add(job->bos, bo);
> +}
> +
> +void
> +panfrost_flush_jobs_writing_resource(struct panfrost_context
> *panfrost,
> +                                struct pipe_resource *prsc)
> +{
> +#if 0
> +        struct hash_entry *entry = _mesa_hash_table_search(panfrost-
> >write_jobs,
> +                                                           prsc);
> +        if (entry) {
> +                struct panfrost_job *job = entry->data;
> +                panfrost_job_submit(panfrost, job);
> +        }
> +#endif
> +        /* TODO stub */

Did you mean to leave this #if'ed out?

> +}
> +
> +void
> +panfrost_flush_jobs_reading_resource(struct panfrost_context
> *panfrost,
> +                                struct pipe_resource *prsc)
> +{
> +        struct panfrost_resource *rsc = pan_resource(prsc);
> +
> +        panfrost_flush_jobs_writing_resource(panfrost, prsc);
> +
> +        hash_table_foreach(panfrost->jobs, entry) {
> +                struct panfrost_job *job = entry->data;
> +
> +                if (_mesa_set_search(job->bos, rsc->bo)) {
> +                        printf("TODO: submit job for flush\n");
> +                        //panfrost_job_submit(panfrost, job);
> +                        continue;
> +                }
> +        }
> +}
> +
>  static bool
>  panfrost_job_compare(const void *a, const void *b)
>  {
> @@ -109,4 +186,9 @@ panfrost_job_init(struct panfrost_context *ctx)
>          ctx->jobs = _mesa_hash_table_create(NULL,
>                                              panfrost_job_hash,
>                                              panfrost_job_compare);
> +
> +        ctx->write_jobs = _mesa_hash_table_create(NULL,
> +                                            _mesa_hash_pointer,
> +                                            _mesa_key_pointer_equal)
> ;
> +
>  }
> diff --git a/src/gallium/drivers/panfrost/pan_job.h
> b/src/gallium/drivers/panfrost/pan_job.h
> index 30f1cf4bd5c..1b28084c599 100644
> --- a/src/gallium/drivers/panfrost/pan_job.h
> +++ b/src/gallium/drivers/panfrost/pan_job.h
> @@ -55,6 +55,8 @@ struct panfrost_job {
>           * bitmask) */
>          unsigned requirements;
>  
> +        /* BOs referenced -- will be used for flushing logic */
> +        struct set *bos;
>  };
>  
>  /* Functions for managing the above */
> @@ -62,6 +64,9 @@ struct panfrost_job {
>  struct panfrost_job *
>  panfrost_create_job(struct panfrost_context *ctx);
>  
> +void
> +panfrost_free_job(struct panfrost_context *ctx, struct panfrost_job
> *job);
> +
>  struct panfrost_job *
>  panfrost_get_job(struct panfrost_context *ctx,
>                  struct pipe_surface **cbufs, struct pipe_surface
> *zsbuf);
> @@ -72,4 +77,15 @@ panfrost_get_job_for_fbo(struct panfrost_context
> *ctx);
>  void
>  panfrost_job_init(struct panfrost_context *ctx);
>  
> +void
> +panfrost_job_add_bo(struct panfrost_job *job, struct panfrost_bo
> *bo);
> +
> +void
> +panfrost_flush_jobs_writing_resource(struct panfrost_context
> *panfrost,
> +                                struct pipe_resource *prsc);
> +
> +void
> +panfrost_flush_jobs_reading_resource(struct panfrost_context
> *panfrost,
> +                                struct pipe_resource *prsc);
> +
>  #endif
> diff --git a/src/gallium/drivers/panfrost/pan_resource.c
> b/src/gallium/drivers/panfrost/pan_resource.c
> index 15d522f1963..a0fc64fa1c1 100644
> --- a/src/gallium/drivers/panfrost/pan_resource.c
> +++ b/src/gallium/drivers/panfrost/pan_resource.c
> @@ -228,6 +228,7 @@ static struct panfrost_bo *
>  panfrost_create_bo(struct panfrost_screen *screen, const struct
> pipe_resource *template)
>  {
>  	struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo);
> +        panfrost_bo_reference(bo);
>  
>          /* Based on the usage, figure out what storing will be used.
> There are
>           * various tradeoffs:
> @@ -297,6 +298,8 @@ panfrost_resource_create(struct pipe_screen
> *screen,
>                          assert(0);
>          }
>  
> +        util_range_init(&so->valid_buffer_range);
> +
>          if (template->bind & PIPE_BIND_DISPLAY_TARGET ||
>              template->bind & PIPE_BIND_SCANOUT ||
>              template->bind & PIPE_BIND_SHARED) {
> @@ -359,6 +362,25 @@ panfrost_destroy_bo(struct panfrost_screen
> *screen, struct panfrost_bo *pbo)
>          }
>  }
>  
> +void
> +panfrost_bo_reference(struct panfrost_bo *bo)
> +{
> +        p_atomic_inc(&bo->reference_count);
> +}
> +
> +void
> +panfrost_bo_unreference(struct pipe_screen *screen, struct
> panfrost_bo *bo)
> +{
> +        /* Check for double free errors */
> +        assert(bo->reference_count);
> +
> +        /* When the reference count goes to zero, we need to cleanup
> */
> +
> +        if (p_atomic_dec_zero(&bo->reference_count)) {
> +                panfrost_destroy_bo(pan_screen(screen), bo);
> +        }
> +}
> +

Why not use pipe_reference instead of open-coding? That helper contains
some neat debug-helpers etc...

>  static void
>  panfrost_resource_destroy(struct pipe_screen *screen,
>                            struct pipe_resource *pt)
> @@ -370,8 +392,9 @@ panfrost_resource_destroy(struct pipe_screen
> *screen,
>  		renderonly_scanout_destroy(rsrc->scanout, pscreen->ro);
>  
>  	if (rsrc->bo)
> -		panfrost_destroy_bo(pscreen, rsrc->bo);
> +                panfrost_bo_unreference(screen, rsrc->bo);
>  
> +        util_range_destroy(&rsrc->valid_buffer_range);
>  	FREE(rsrc);
>  }
>  
> @@ -384,7 +407,8 @@ panfrost_transfer_map(struct pipe_context *pctx,
>                        struct pipe_transfer **out_transfer)
>  {
>          int bytes_per_pixel = util_format_get_blocksize(resource-
> >format);
> -        struct panfrost_bo *bo = pan_resource(resource)->bo;
> +        struct panfrost_resource *rsrc = pan_resource(resource);
> +        struct panfrost_bo *bo = rsrc->bo;
>  
>          struct panfrost_gtransfer *transfer =
> CALLOC_STRUCT(panfrost_gtransfer);
>          transfer->base.level = level;
> @@ -405,6 +429,25 @@ panfrost_transfer_map(struct pipe_context *pctx,
>                  panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
>          }
>  
> +        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
> +                /* TODO: reallocate */
> +                //printf("debug: Missed reallocate\n");
> +        } else if ((usage & PIPE_TRANSFER_WRITE)
> +                        && resource->target == PIPE_BUFFER
> +                        && !util_ranges_intersect(&rsrc-
> >valid_buffer_range, box->x, box->x + box->width)) {
> +                /* No flush for writes to uninitialized */
> +        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
> +                if (usage & PIPE_TRANSFER_WRITE) {
> +                        /* STUB: flush reading */
> +                        //printf("debug: missed reading flush %d\n",
> resource->target);
> +                } else if (usage & PIPE_TRANSFER_READ) {
> +                        /* STUB: flush writing */
> +                        //printf("debug: missed writing flush %d
> (%d-%d)\n", resource->target, box->x, box->x + box->width);
> +                } else {
> +                        /* Why are you even mapping?! */
> +                }
> +        }
> +
>          if (bo->layout != PAN_LINEAR) {
>                  /* Non-linear resources need to be indirectly mapped
> */
>  
> @@ -460,9 +503,9 @@ panfrost_transfer_unmap(struct pipe_context
> *pctx,
>          /* Gallium expects writeback here, so we tile */
>  
>          struct panfrost_gtransfer *trans = pan_transfer(transfer);
> +        struct panfrost_resource *prsrc = (struct panfrost_resource
> *) transfer->resource;
>  
>          if (trans->map) {
> -                struct panfrost_resource *prsrc = (struct
> panfrost_resource *) transfer->resource;
>                  struct panfrost_bo *bo = prsrc->bo;
>  
>                  if (transfer->usage & PIPE_TRANSFER_WRITE) {
> @@ -480,6 +523,11 @@ panfrost_transfer_unmap(struct pipe_context
> *pctx,
>                  free(trans->map);
>          }
>  
> +
> +	util_range_add(&prsrc->valid_buffer_range,
> +                        transfer->box.x,
> +                        transfer->box.x + transfer->box.width);
> +
>          /* Derefence the resource */
>          pipe_resource_reference(&transfer->resource, NULL);
>  
> @@ -487,6 +535,20 @@ panfrost_transfer_unmap(struct pipe_context
> *pctx,
>          free(transfer);
>  }
>  
> +static void
> +panfrost_transfer_flush_region(struct pipe_context *pctx,
> +		struct pipe_transfer *transfer,
> +		const struct pipe_box *box)
> +{
> +	struct panfrost_resource *rsc = pan_resource(transfer-
> >resource);
> +
> +	if (transfer->resource->target == PIPE_BUFFER) {
> +		util_range_add(&rsc->valid_buffer_range,
> +					   transfer->box.x + box->x,
> +					   transfer->box.x + box->x +
> box->width);
> +        }
> +}
> +
>  static struct pb_slab *
>  panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size,
> unsigned group_index)
>  {
> @@ -564,7 +626,7 @@ static const struct u_transfer_vtbl transfer_vtbl
> = {
>          .resource_destroy         = panfrost_resource_destroy,
>          .transfer_map             = panfrost_transfer_map,
>          .transfer_unmap           = panfrost_transfer_unmap,
> -        .transfer_flush_region    = u_default_transfer_flush_region,
> +        .transfer_flush_region    = panfrost_transfer_flush_region,
>          .get_internal_format      =
> panfrost_resource_get_internal_format,
>          .set_stencil              = panfrost_resource_set_stencil,
>          .get_stencil              = panfrost_resource_get_stencil,
> diff --git a/src/gallium/drivers/panfrost/pan_resource.h
> b/src/gallium/drivers/panfrost/pan_resource.h
> index a1315ab1b43..b9d6cd2b023 100644
> --- a/src/gallium/drivers/panfrost/pan_resource.h
> +++ b/src/gallium/drivers/panfrost/pan_resource.h
> @@ -30,6 +30,7 @@
>  #include "pan_screen.h"
>  #include "pan_allocate.h"
>  #include "drm-uapi/drm.h"
> +#include "util/u_range.h"
>  
>  /* Describes the memory layout of a BO */
>  
> @@ -45,6 +46,10 @@ struct panfrost_slice {
>  };
>  
>  struct panfrost_bo {
> +        /* Reference count */
> +        unsigned reference_count;
> +
> +        /* Description of the mip levels */
>          struct panfrost_slice slices[MAX_MIP_LEVELS];
>  
>          /* Mapping for the entire object (all levels) */
> @@ -83,6 +88,12 @@ struct panfrost_bo {
>          int gem_handle;
>  };
>  
> +void
> +panfrost_bo_reference(struct panfrost_bo *bo);
> +
> +void
> +panfrost_bo_unreference(struct pipe_screen *screen, struct
> panfrost_bo *bo);
> +
>  struct panfrost_resource {
>          struct pipe_resource base;
>  
> @@ -90,6 +101,8 @@ struct panfrost_resource {
>          struct renderonly_scanout *scanout;
>  
>          struct panfrost_resource *separate_stencil;
> +
> +        struct util_range valid_buffer_range;
>  };
>  
>  static inline struct panfrost_resource *
> diff --git a/src/gallium/drivers/panfrost/pan_screen.c
> b/src/gallium/drivers/panfrost/pan_screen.c
> index 71c6175d069..6d3aca594f1 100644
> --- a/src/gallium/drivers/panfrost/pan_screen.c
> +++ b/src/gallium/drivers/panfrost/pan_screen.c
> @@ -597,8 +597,8 @@ panfrost_create_screen(int fd, struct renderonly
> *ro)
>          screen->base.fence_reference = panfrost_fence_reference;
>          screen->base.fence_finish = panfrost_fence_finish;
>  
> -	screen->last_fragment_id = -1;
>  	screen->last_fragment_flushed = true;
> +        screen->last_job = NULL;
>  
>          panfrost_resource_screen_init(screen);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_screen.h
> b/src/gallium/drivers/panfrost/pan_screen.h
> index cbadf813675..4e5efa91f22 100644
> --- a/src/gallium/drivers/panfrost/pan_screen.h
> +++ b/src/gallium/drivers/panfrost/pan_screen.h
> @@ -92,8 +92,11 @@ struct panfrost_screen {
>          /* TODO: Where? */
>          struct panfrost_resource *display_target;
>  
> -	int last_fragment_id;
> +        /* While we're busy building up the job for frame N, the GPU
> is
> +         * still busy executing frame N-1. So hold a reference to
> +         * yesterjob */
>  	int last_fragment_flushed;
> +        struct panfrost_job *last_job;
>  };
>  
>  #endif /* PAN_SCREEN_H */
> > diff --git a/src/gallium/drivers/panfrost/pan_job.c
> > b/src/gallium/drivers/panfrost/pan_job.c
> > index 66a8b0d4b07..6c68575158f 100644
> > --- a/src/gallium/drivers/panfrost/pan_job.c
> > +++ b/src/gallium/drivers/panfrost/pan_job.c
> > @@ -27,6 +27,13 @@
> >  #include "util/hash_table.h"
> >  #include "util/ralloc.h"
> >  
> > +static void
> > +remove_from_ht(struct hash_table *ht, void *key)
> > +{
> > +        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
> > +        _mesa_hash_table_remove(ht, entry);
> > +}
> > +
> 
> This is the same as _mesa_hash_table_remove_key(), no?

Maybe? I copypasted this from v3d, but maybe we're both duplicating
code.

> Did you mean to leave this #if'ed out?

Yes, job flushing is a lot more complicated / depends on a lot more
code; I just wanted the stub here for now.

> Why not use pipe_reference instead of open-coding? That helper contains
> some neat debug-helpers etc...

pipe_reference kind of scares me... Most of the abstractions here are
based heavily on v3d's; I figure if anholt had a good reason to do it,
that's good enough for me..
> loop in panfrost_emit_vertex_data() to do the right thing. But I think
> we can get rid of ->vertex_buffer_count entirely and just do a
> 
> 	for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i)
> 
> >  }

Sure, that's probably cleaner now that we're testing the mask. Will do
for v2.
> Had a look at the cso code (more precisely
> cso_{save,restore}_vertex_buffer0() and their users) and I'm not sure
> util_set_vertex_buffers_mask() does what's expected by the CSO layer
> (don't know who's right about the ->set_vertex_buffers() semantic
> though).

I'm not sure. Other drivers seem to implement set_vertex_buffers in
precisely this way..?
> It's outside of the diff context, but we shouldn't use i as the index
> for attrs[] as the first active vertex buffers might be != 0 and vb_mask
> might be sparse (my patch had the same issue).

Ack, good catch, thank you. Will address in next version. +1

Gotcha, thank you! I'll fix this over.
On Tue, Apr 16, 2019 at 12:31 AM Boris Brezillon
<boris.brezillon@collabora.com> wrote:
>
> On Tue, 16 Apr 2019 01:49:21 +0000
> Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
>
> > @@ -1793,22 +1799,9 @@ panfrost_set_vertex_buffers(
> >          const struct pipe_vertex_buffer *buffers)
> >  {
> >          struct panfrost_context *ctx = pan_context(pctx);
> > -        assert(num_buffers <= PIPE_MAX_ATTRIBS);
> > -
> > -        /* XXX: Dirty tracking? etc */
> > -        if (buffers) {
> > -                size_t sz = sizeof(buffers[0]) * num_buffers;
> > -                ctx->vertex_buffers = malloc(sz);
> > -                ctx->vertex_buffer_count = num_buffers;
> > -                memcpy(ctx->vertex_buffers, buffers, sz);
> > -        } else {
> > -                if (ctx->vertex_buffers) {
> > -                        free(ctx->vertex_buffers);
> > -                        ctx->vertex_buffers = NULL;
> > -                }
> >
> > -                ctx->vertex_buffer_count = 0;
> > -        }
> > +        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
> > +        ctx->vertex_buffer_count = num_buffers;
>
> ->vertex_buffer_count should be set to fls(ctx->vb_mask) (fls == find
> last bit set) if you want the

or util_last_bit()

BR,
-R

>
>         for (int i = 0; i < ctx->vertex_buffer_count; ++i)
>
> loop in panfrost_emit_vertex_data() to do the right thing. But I think
> we can get rid of ->vertex_buffer_count entirely and just do a
>
>         for (int i = 0; i < ARRAY_SIZE(ctx->vertex_buffers); ++i)
>
> >  }
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Tue, Apr 16, 2019 at 8:07 AM Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
>
> > > diff --git a/src/gallium/drivers/panfrost/pan_job.c
> > > b/src/gallium/drivers/panfrost/pan_job.c
> > > index 66a8b0d4b07..6c68575158f 100644
> > > --- a/src/gallium/drivers/panfrost/pan_job.c
> > > +++ b/src/gallium/drivers/panfrost/pan_job.c
> > > @@ -27,6 +27,13 @@
> > >  #include "util/hash_table.h"
> > >  #include "util/ralloc.h"
> > >
> > > +static void
> > > +remove_from_ht(struct hash_table *ht, void *key)
> > > +{
> > > +        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
> > > +        _mesa_hash_table_remove(ht, entry);
> > > +}
> > > +
> >
> > This is the same as _mesa_hash_table_remove_key(), no?
>
> Maybe? I copypasted this from v3d, but maybe we're both duplicating
> code.
>
> > Did you mean to leave this #if'ed out?
>
> Yes, job flushing is a lot more complicated / depends on a lot more
> code; I just wanted the stub here for now.
>
> > Why not use pipe_reference instead of open-coding? That helper contains
> > some neat debug-helpers etc...
>
> pipe_reference kind of scares me... Most of the abstractions here are
> based heavily on v3d's; I figure if anholt had a good reason to do it,
> that's good enough for me..

I think the only (valid) reason not to use pipe_reference would be if
it was some code that might someday be useful for a vulkan driver.

(That said, maybe we should move more and more of gallium's helpful
util stuff out of gallium so they can be re-used across the mesa
tree...)

BR,
-R

> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> I think the only (valid) reason not to use pipe_reference would be if
> it was some code that might someday be useful for a vulkan driver.

Good to know :)

> (That said, maybe we should move more and more of gallium's helpful
> util stuff out of gallium so they can be re-used across the mesa
> tree...)

Probably.