[v3,04/25] panfrost: Make transient allocation rely on the BO cache

Submitted by Boris Brezillon on Sept. 5, 2019, 7:41 p.m.

Details

Message ID 20190905194150.17608-5-boris.brezillon@collabora.com
State Accepted
Commit a2bba567aeaad0a0f83f171d5913b54509704e7a
Headers show
Series "panfrost: Rework the batch pipelining logic" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Boris Brezillon Sept. 5, 2019, 7:41 p.m.
Right now, the transient memory allocator implements its own BO caching
mechanism, which is not really needed since we already have a generic
BO cache. Let's simplify things a bit.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
---
Changes in v3:
* Collect R-b

Changes in v2:
* None
---
 src/gallium/drivers/panfrost/pan_allocate.c | 80 ++++-----------------
 src/gallium/drivers/panfrost/pan_job.c      | 11 ---
 src/gallium/drivers/panfrost/pan_job.h      |  4 +-
 src/gallium/drivers/panfrost/pan_screen.c   |  4 --
 src/gallium/drivers/panfrost/pan_screen.h   | 21 ------
 5 files changed, 16 insertions(+), 104 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c
index d8a594551c76..a22b1a5a88d6 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -34,27 +34,6 @@ 
 /* TODO: What does this actually have to be? */
 #define ALIGNMENT 128
 
-/* Allocate a new transient slab */
-
-static struct panfrost_bo *
-panfrost_create_slab(struct panfrost_screen *screen, unsigned *index)
-{
-        /* Allocate a new slab on the screen */
-
-        struct panfrost_bo **new =
-                util_dynarray_grow(&screen->transient_bo,
-                                struct panfrost_bo *, 1);
-
-        struct panfrost_bo *alloc = panfrost_drm_create_bo(screen, TRANSIENT_SLAB_SIZE, 0);
-
-        *new = alloc;
-
-        /* Return the BO as well as the index we just added */
-
-        *index = util_dynarray_num_elements(&screen->transient_bo, void *) - 1;
-        return alloc;
-}
-
 /* Transient command stream pooling: command stream uploads try to simply copy
  * into whereever we left off. If there isn't space, we allocate a new entry
  * into the pool and copy there */
@@ -72,59 +51,32 @@  panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
         struct panfrost_bo *bo = NULL;
 
         unsigned offset = 0;
-        bool update_offset = false;
 
-        pthread_mutex_lock(&screen->transient_lock);
-        bool has_current = batch->transient_indices.size;
         bool fits_in_current = (batch->transient_offset + sz) < TRANSIENT_SLAB_SIZE;
 
-        if (likely(has_current && fits_in_current)) {
-                /* We can reuse the topmost BO, so get it */
-                unsigned idx = util_dynarray_top(&batch->transient_indices, unsigned);
-                bo = pan_bo_for_index(screen, idx);
+        if (likely(batch->transient_bo && fits_in_current)) {
+                /* We can reuse the current BO, so get it */
+                bo = batch->transient_bo;
 
                 /* Use the specified offset */
                 offset = batch->transient_offset;
-                update_offset = true;
-        } else if (sz < TRANSIENT_SLAB_SIZE) {
-                /* We can't reuse the topmost BO, but we can get a new one.
-                 * First, look for a free slot */
-
-                unsigned count = util_dynarray_num_elements(&screen->transient_bo, void *);
-                unsigned index = 0;
-
-                unsigned free = __bitset_ffs(
-                                screen->free_transient,
-                                count / BITSET_WORDBITS);
-
-                if (likely(free)) {
-                        /* Use this one */
-                        index = free - 1;
-
-                        /* It's ours, so no longer free */
-                        BITSET_CLEAR(screen->free_transient, index);
-
-                        /* Grab the BO */
-                        bo = pan_bo_for_index(screen, index);
-                } else {
-                        /* Otherwise, create a new BO */
-                        bo = panfrost_create_slab(screen, &index);
-                }
-
-                panfrost_batch_add_bo(batch, bo);
-
-                /* Remember we created this */
-                util_dynarray_append(&batch->transient_indices, unsigned, index);
-
-                update_offset = true;
+                batch->transient_offset = offset + sz;
         } else {
-                /* Create a new BO and reference it */
-                bo = panfrost_drm_create_bo(screen, ALIGN_POT(sz, 4096), 0);
+                size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
+                               TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
+
+                /* We can't reuse the current BO, but we can create a new one. */
+                bo = panfrost_drm_create_bo(screen, bo_sz, 0);
                 panfrost_batch_add_bo(batch, bo);
 
                 /* Creating a BO adds a reference, and then the job adds a
                  * second one. So we need to pop back one reference */
                 panfrost_bo_unreference(&screen->base, bo);
+
+                if (sz < TRANSIENT_SLAB_SIZE) {
+                        batch->transient_bo = bo;
+                        batch->transient_offset = offset + sz;
+                }
         }
 
         struct panfrost_transfer ret = {
@@ -132,10 +84,6 @@  panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
                 .gpu = bo->gpu + offset,
         };
 
-        if (update_offset)
-                batch->transient_offset = offset + sz;
-        pthread_mutex_unlock(&screen->transient_lock);
-
         return ret;
 
 }
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 0d19c2b4c5cd..58b494108e2d 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -48,7 +48,6 @@  panfrost_create_batch(struct panfrost_context *ctx)
 
         util_dynarray_init(&batch->headers, batch);
         util_dynarray_init(&batch->gpu_headers, batch);
-        util_dynarray_init(&batch->transient_indices, batch);
 
         return batch;
 }
@@ -66,16 +65,6 @@  panfrost_free_batch(struct panfrost_batch *batch)
                 panfrost_bo_unreference(ctx->base.screen, bo);
         }
 
-        /* Free up the transient BOs we're sitting on */
-        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
-
-        pthread_mutex_lock(&screen->transient_lock);
-        util_dynarray_foreach(&batch->transient_indices, unsigned, index) {
-                /* Mark it free */
-                BITSET_SET(screen->free_transient, *index);
-        }
-        pthread_mutex_unlock(&screen->transient_lock);
-
         /* Unreference the polygon list */
         panfrost_bo_unreference(ctx->base.screen, batch->polygon_list);
 
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 7e0b7b3f314e..c9f487871216 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -107,8 +107,8 @@  struct panfrost_batch {
         /* BOs referenced -- will be used for flushing logic */
         struct set *bos;
 
-        /* Indices of transient BOs referenced */
-        struct util_dynarray transient_indices;
+        /* Current transient BO */
+	struct panfrost_bo *transient_bo;
 
         /* Within the topmost transient BO, how much has been used? */
         unsigned transient_offset;
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index bd826808fd6f..5dcceac370d5 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -543,7 +543,6 @@  panfrost_destroy_screen(struct pipe_screen *pscreen)
         struct panfrost_screen *screen = pan_screen(pscreen);
         panfrost_bo_cache_evict_all(screen);
         pthread_mutex_destroy(&screen->bo_cache_lock);
-        pthread_mutex_destroy(&screen->transient_lock);
         drmFreeVersion(screen->kernel_version);
         ralloc_free(screen);
 }
@@ -641,9 +640,6 @@  panfrost_create_screen(int fd, struct renderonly *ro)
                 return NULL;
         }
 
-        pthread_mutex_init(&screen->transient_lock, NULL);
-        util_dynarray_init(&screen->transient_bo, screen);
-
         pthread_mutex_init(&screen->bo_cache_lock, NULL);
         for (unsigned i = 0; i < ARRAY_SIZE(screen->bo_cache); ++i)
                 list_inithead(&screen->bo_cache[i]);
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index 11cbb72075ab..7ed5193277ac 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -105,17 +105,6 @@  struct panfrost_screen {
 
         struct renderonly *ro;
 
-        pthread_mutex_t transient_lock;
-
-        /* Transient memory management is based on borrowing fixed-size slabs
-         * off the screen (loaning them out to the batch). Dynamic array
-         * container of panfrost_bo */
-
-        struct util_dynarray transient_bo;
-
-        /* Set of free transient BOs */
-        BITSET_DECLARE(free_transient, MAX_TRANSIENT_SLABS);
-
         pthread_mutex_t bo_cache_lock;
 
         /* The BO cache is a set of buckets with power-of-two sizes ranging
@@ -131,16 +120,6 @@  pan_screen(struct pipe_screen *p)
         return (struct panfrost_screen *)p;
 }
 
-/* Get a transient BO off the screen given a
- * particular index */
-
-static inline struct panfrost_bo *
-pan_bo_for_index(struct panfrost_screen *screen, unsigned index)
-{
-        return *(util_dynarray_element(&screen->transient_bo,
-                                struct panfrost_bo *, index));
-}
-
 void
 panfrost_drm_allocate_slab(struct panfrost_screen *screen,
                            struct panfrost_memory *mem,

Comments

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>

> Right now, the transient memory allocator implements its own BO caching
> mechanism, which is not really needed since we already have a generic
> BO cache. Let's simplify things a bit.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> ---
> Changes in v3:
> * Collect R-b
> 
> Changes in v2:
> * None
> ---
>  src/gallium/drivers/panfrost/pan_allocate.c | 80 ++++-----------------
>  src/gallium/drivers/panfrost/pan_job.c      | 11 ---
>  src/gallium/drivers/panfrost/pan_job.h      |  4 +-
>  src/gallium/drivers/panfrost/pan_screen.c   |  4 --
>  src/gallium/drivers/panfrost/pan_screen.h   | 21 ------
>  5 files changed, 16 insertions(+), 104 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c
> index d8a594551c76..a22b1a5a88d6 100644
> --- a/src/gallium/drivers/panfrost/pan_allocate.c
> +++ b/src/gallium/drivers/panfrost/pan_allocate.c
> @@ -34,27 +34,6 @@
>  /* TODO: What does this actually have to be? */
>  #define ALIGNMENT 128
>  
> -/* Allocate a new transient slab */
> -
> -static struct panfrost_bo *
> -panfrost_create_slab(struct panfrost_screen *screen, unsigned *index)
> -{
> -        /* Allocate a new slab on the screen */
> -
> -        struct panfrost_bo **new =
> -                util_dynarray_grow(&screen->transient_bo,
> -                                struct panfrost_bo *, 1);
> -
> -        struct panfrost_bo *alloc = panfrost_drm_create_bo(screen, TRANSIENT_SLAB_SIZE, 0);
> -
> -        *new = alloc;
> -
> -        /* Return the BO as well as the index we just added */
> -
> -        *index = util_dynarray_num_elements(&screen->transient_bo, void *) - 1;
> -        return alloc;
> -}
> -
>  /* Transient command stream pooling: command stream uploads try to simply copy
>   * into whereever we left off. If there isn't space, we allocate a new entry
>   * into the pool and copy there */
> @@ -72,59 +51,32 @@ panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
>          struct panfrost_bo *bo = NULL;
>  
>          unsigned offset = 0;
> -        bool update_offset = false;
>  
> -        pthread_mutex_lock(&screen->transient_lock);
> -        bool has_current = batch->transient_indices.size;
>          bool fits_in_current = (batch->transient_offset + sz) < TRANSIENT_SLAB_SIZE;
>  
> -        if (likely(has_current && fits_in_current)) {
> -                /* We can reuse the topmost BO, so get it */
> -                unsigned idx = util_dynarray_top(&batch->transient_indices, unsigned);
> -                bo = pan_bo_for_index(screen, idx);
> +        if (likely(batch->transient_bo && fits_in_current)) {
> +                /* We can reuse the current BO, so get it */
> +                bo = batch->transient_bo;
>  
>                  /* Use the specified offset */
>                  offset = batch->transient_offset;
> -                update_offset = true;
> -        } else if (sz < TRANSIENT_SLAB_SIZE) {
> -                /* We can't reuse the topmost BO, but we can get a new one.
> -                 * First, look for a free slot */
> -
> -                unsigned count = util_dynarray_num_elements(&screen->transient_bo, void *);
> -                unsigned index = 0;
> -
> -                unsigned free = __bitset_ffs(
> -                                screen->free_transient,
> -                                count / BITSET_WORDBITS);
> -
> -                if (likely(free)) {
> -                        /* Use this one */
> -                        index = free - 1;
> -
> -                        /* It's ours, so no longer free */
> -                        BITSET_CLEAR(screen->free_transient, index);
> -
> -                        /* Grab the BO */
> -                        bo = pan_bo_for_index(screen, index);
> -                } else {
> -                        /* Otherwise, create a new BO */
> -                        bo = panfrost_create_slab(screen, &index);
> -                }
> -
> -                panfrost_batch_add_bo(batch, bo);
> -
> -                /* Remember we created this */
> -                util_dynarray_append(&batch->transient_indices, unsigned, index);
> -
> -                update_offset = true;
> +                batch->transient_offset = offset + sz;
>          } else {
> -                /* Create a new BO and reference it */
> -                bo = panfrost_drm_create_bo(screen, ALIGN_POT(sz, 4096), 0);
> +                size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
> +                               TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
> +
> +                /* We can't reuse the current BO, but we can create a new one. */
> +                bo = panfrost_drm_create_bo(screen, bo_sz, 0);
>                  panfrost_batch_add_bo(batch, bo);
>  
>                  /* Creating a BO adds a reference, and then the job adds a
>                   * second one. So we need to pop back one reference */
>                  panfrost_bo_unreference(&screen->base, bo);
> +
> +                if (sz < TRANSIENT_SLAB_SIZE) {
> +                        batch->transient_bo = bo;
> +                        batch->transient_offset = offset + sz;
> +                }
>          }
>  
>          struct panfrost_transfer ret = {
> @@ -132,10 +84,6 @@ panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
>                  .gpu = bo->gpu + offset,
>          };
>  
> -        if (update_offset)
> -                batch->transient_offset = offset + sz;
> -        pthread_mutex_unlock(&screen->transient_lock);
> -
>          return ret;
>  
>  }
> diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
> index 0d19c2b4c5cd..58b494108e2d 100644
> --- a/src/gallium/drivers/panfrost/pan_job.c
> +++ b/src/gallium/drivers/panfrost/pan_job.c
> @@ -48,7 +48,6 @@ panfrost_create_batch(struct panfrost_context *ctx)
>  
>          util_dynarray_init(&batch->headers, batch);
>          util_dynarray_init(&batch->gpu_headers, batch);
> -        util_dynarray_init(&batch->transient_indices, batch);
>  
>          return batch;
>  }
> @@ -66,16 +65,6 @@ panfrost_free_batch(struct panfrost_batch *batch)
>                  panfrost_bo_unreference(ctx->base.screen, bo);
>          }
>  
> -        /* Free up the transient BOs we're sitting on */
> -        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
> -
> -        pthread_mutex_lock(&screen->transient_lock);
> -        util_dynarray_foreach(&batch->transient_indices, unsigned, index) {
> -                /* Mark it free */
> -                BITSET_SET(screen->free_transient, *index);
> -        }
> -        pthread_mutex_unlock(&screen->transient_lock);
> -
>          /* Unreference the polygon list */
>          panfrost_bo_unreference(ctx->base.screen, batch->polygon_list);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
> index 7e0b7b3f314e..c9f487871216 100644
> --- a/src/gallium/drivers/panfrost/pan_job.h
> +++ b/src/gallium/drivers/panfrost/pan_job.h
> @@ -107,8 +107,8 @@ struct panfrost_batch {
>          /* BOs referenced -- will be used for flushing logic */
>          struct set *bos;
>  
> -        /* Indices of transient BOs referenced */
> -        struct util_dynarray transient_indices;
> +        /* Current transient BO */
> +	struct panfrost_bo *transient_bo;
>  
>          /* Within the topmost transient BO, how much has been used? */
>          unsigned transient_offset;
> diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
> index bd826808fd6f..5dcceac370d5 100644
> --- a/src/gallium/drivers/panfrost/pan_screen.c
> +++ b/src/gallium/drivers/panfrost/pan_screen.c
> @@ -543,7 +543,6 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
>          struct panfrost_screen *screen = pan_screen(pscreen);
>          panfrost_bo_cache_evict_all(screen);
>          pthread_mutex_destroy(&screen->bo_cache_lock);
> -        pthread_mutex_destroy(&screen->transient_lock);
>          drmFreeVersion(screen->kernel_version);
>          ralloc_free(screen);
>  }
> @@ -641,9 +640,6 @@ panfrost_create_screen(int fd, struct renderonly *ro)
>                  return NULL;
>          }
>  
> -        pthread_mutex_init(&screen->transient_lock, NULL);
> -        util_dynarray_init(&screen->transient_bo, screen);
> -
>          pthread_mutex_init(&screen->bo_cache_lock, NULL);
>          for (unsigned i = 0; i < ARRAY_SIZE(screen->bo_cache); ++i)
>                  list_inithead(&screen->bo_cache[i]);
> diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
> index 11cbb72075ab..7ed5193277ac 100644
> --- a/src/gallium/drivers/panfrost/pan_screen.h
> +++ b/src/gallium/drivers/panfrost/pan_screen.h
> @@ -105,17 +105,6 @@ struct panfrost_screen {
>  
>          struct renderonly *ro;
>  
> -        pthread_mutex_t transient_lock;
> -
> -        /* Transient memory management is based on borrowing fixed-size slabs
> -         * off the screen (loaning them out to the batch). Dynamic array
> -         * container of panfrost_bo */
> -
> -        struct util_dynarray transient_bo;
> -
> -        /* Set of free transient BOs */
> -        BITSET_DECLARE(free_transient, MAX_TRANSIENT_SLABS);
> -
>          pthread_mutex_t bo_cache_lock;
>  
>          /* The BO cache is a set of buckets with power-of-two sizes ranging
> @@ -131,16 +120,6 @@ pan_screen(struct pipe_screen *p)
>          return (struct panfrost_screen *)p;
>  }
>  
> -/* Get a transient BO off the screen given a
> - * particular index */
> -
> -static inline struct panfrost_bo *
> -pan_bo_for_index(struct panfrost_screen *screen, unsigned index)
> -{
> -        return *(util_dynarray_element(&screen->transient_bo,
> -                                struct panfrost_bo *, index));
> -}
> -
>  void
>  panfrost_drm_allocate_slab(struct panfrost_screen *screen,
>                             struct panfrost_memory *mem,
> -- 
> 2.21.0