[v3,13/17] panfrost: Make sure the BO is 'ready' when picked from the cache

Submitted by Boris Brezillon on Sept. 18, 2019, 1:24 p.m.

Details

Message ID 20190918132439.27705-14-boris.brezillon@collabora.com
State New
Headers show
Series "panfrost: Support batch pipelining" ( rev: 2 ) in Mesa

Not browsing as part of any series.

Commit Message

Boris Brezillon Sept. 18, 2019, 1:24 p.m.
This is needed if we want to free the panfrost_batch object at submit
time in order to not have to GC the batch on the next job submission.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
Changes in v3:
* Move the patch later in the series and squash "panfrost: Cache GPU
  accesses to BOs" in it
* Add extra comments to explain what we're doing
---
 src/gallium/drivers/panfrost/pan_bo.c  | 112 ++++++++++++++++++++-----
 src/gallium/drivers/panfrost/pan_bo.h  |   9 ++
 src/gallium/drivers/panfrost/pan_job.c |  11 +++
 3 files changed, 109 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/pan_bo.c b/src/gallium/drivers/panfrost/pan_bo.c
index 9daddf9d0cc2..37602688d630 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -23,6 +23,7 @@ 
  * Authors (Collabora):
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
+#include <errno.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <xf86drm.h>
@@ -101,6 +102,63 @@  panfrost_bo_free(struct panfrost_bo *bo)
         ralloc_free(bo);
 }
 
+/* Returns true if the BO is ready, false otherwise.
+ * access_type is encoding the type of access one wants to ensure is done.
+ * Say you want to make sure all writers are done writing, you should pass
+ * PAN_BO_ACCESS_WRITE.
+ * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
+ * PAN_BO_ACCESS_READ would work too as waiting for readers implies
+ * waiting for writers as well, but we want to make things explicit and waiting
+ * only for readers is impossible.
+ */
+bool
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                 uint32_t access_type)
+{
+        struct drm_panfrost_wait_bo req = {
+                .handle = bo->gem_handle,
+		.timeout_ns = timeout_ns,
+        };
+        int ret;
+
+        assert(access_type == PAN_BO_ACCESS_WRITE ||
+               access_type == PAN_BO_ACCESS_RW);
+
+        /* If the BO has been exported or imported we can't rely on the cached
+         * state, we need to call the WAIT_BO ioctl.
+         */
+        if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
+                /* If ->gpu_access is 0, the BO is idle, no need to wait. */
+                if (!bo->gpu_access)
+                        return true;
+
+                /* If the caller only wants to wait for writers and no
+                 * writes are pending, we don't have to wait.
+                 */
+                if (access_type == PAN_BO_ACCESS_WRITE &&
+                    !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
+                        return true;
+        }
+
+        /* The ioctl returns >= 0 value when the BO we are waiting for is ready
+         * -1 otherwise.
+         */
+        ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
+        if (ret != -1) {
+                /* Set gpu_access to 0 so that the next call to bo_wait()
+                 * doesn't have to call the WAIT_BO ioctl.
+                 */
+                bo->gpu_access = 0;
+                return true;
+        }
+
+        /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
+         * is invalid, which shouldn't happen here.
+         */
+        assert(errno == ETIMEDOUT || errno == EBUSY);
+        return false;
+}
+
 /* Helper to calculate the bucket index of a BO */
 
 static unsigned
@@ -137,9 +195,8 @@  pan_bucket(struct panfrost_screen *screen, unsigned size)
  * BO. */
 
 static struct panfrost_bo *
-panfrost_bo_cache_fetch(
-                struct panfrost_screen *screen,
-                size_t size, uint32_t flags)
+panfrost_bo_cache_fetch(struct panfrost_screen *screen,
+                        size_t size, uint32_t flags, bool dontwait)
 {
         pthread_mutex_lock(&screen->bo_cache_lock);
         struct list_head *bucket = pan_bucket(screen, size);
@@ -147,27 +204,30 @@  panfrost_bo_cache_fetch(
 
         /* Iterate the bucket looking for something suitable */
         list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
-                if (entry->size >= size &&
-                    entry->flags == flags) {
-                        int ret;
-                        struct drm_panfrost_madvise madv;
+                if (entry->size < size || entry->flags != flags)
+                        continue;
 
-                        /* This one works, splice it out of the cache */
-                        list_del(&entry->link);
+                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
+                                      PAN_BO_ACCESS_RW))
+                        continue;
 
-                        madv.handle = entry->gem_handle;
-                        madv.madv = PANFROST_MADV_WILLNEED;
-                        madv.retained = 0;
+                struct drm_panfrost_madvise madv = {
+                        .handle = entry->gem_handle,
+                        .madv = PANFROST_MADV_WILLNEED,
+                };
+                int ret;
 
-                        ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
-                        if (!ret && !madv.retained) {
-                                panfrost_bo_free(entry);
-                                continue;
-                        }
-                        /* Let's go! */
-                        bo = entry;
-                        break;
+                /* This one works, splice it out of the cache */
+                list_del(&entry->link);
+
+                ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
+                if (!ret && !madv.retained) {
+                        panfrost_bo_free(entry);
+                        continue;
                 }
+                /* Let's go! */
+                bo = entry;
+                break;
         }
         pthread_mutex_unlock(&screen->bo_cache_lock);
 
@@ -281,12 +341,18 @@  panfrost_bo_create(struct panfrost_screen *screen, size_t size,
         if (flags & PAN_BO_GROWABLE)
                 assert(flags & PAN_BO_INVISIBLE);
 
-        /* Before creating a BO, we first want to check the cache, otherwise,
-         * the cache misses and we need to allocate a BO fresh from the kernel
+        /* Before creating a BO, we first want to check the cache but without
+         * waiting for BO readiness (BOs in the cache can still be referenced
+         * by jobs that are not finished yet).
+         * If the cached allocation fails we fall back on fresh BO allocation,
+         * and if that fails too, we try one more time to allocate from the
+         * cache, but this time we accept to wait.
          */
-        bo = panfrost_bo_cache_fetch(screen, size, flags);
+        bo = panfrost_bo_cache_fetch(screen, size, flags, true);
         if (!bo)
                 bo = panfrost_bo_alloc(screen, size, flags);
+        if (!bo)
+                bo = panfrost_bo_cache_fetch(screen, size, flags, false);
 
         if (!bo)
                 fprintf(stderr, "BO creation failed\n");
diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h
index e4743f820aeb..022a56d9ca34 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -100,6 +100,12 @@  struct panfrost_bo {
         int gem_handle;
 
         uint32_t flags;
+
+        /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
+         * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
+         * when the BO is idle.
+         */
+        uint32_t gpu_access;
 };
 
 static inline uint32_t
@@ -113,6 +119,9 @@  panfrost_bo_access_for_stage(enum pipe_shader_type stage)
                PAN_BO_ACCESS_VERTEX_TILER;
 }
 
+bool
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                 uint32_t access_type);
 void
 panfrost_bo_reference(struct panfrost_bo *bo);
 void
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 235cb21dc8c8..a56f4044fda0 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -810,8 +810,19 @@  panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 
         hash_table_foreach(batch->bos, entry) {
                 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                uint32_t flags = (uintptr_t)entry->data;
+
                 assert(bo->gem_handle > 0);
                 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
+
+                /* Update the BO access flags so that panfrost_bo_wait() knows
+                 * about all pending accesses.
+                 * We only keep the READ/WRITE info since this is all the BO
+                 * wait logic cares about.
+                 * We also preserve existing flags as this batch might not
+                 * be the first one to access the BO.
+                 */
+                bo->gpu_access |= flags & (PAN_BO_ACCESS_RW);
         }
 
         submit.bo_handles = (u64) (uintptr_t) bo_handles;

Comments

Very nice! I'm quite happy with this version, all considered, so R-b!

On Wed, Sep 18, 2019 at 03:24:35PM +0200, Boris Brezillon wrote:
> This is needed if we want to free the panfrost_batch object at submit
> time in order to not have to GC the batch on the next job submission.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> Changes in v3:
> * Move the patch later in the series and squash "panfrost: Cache GPU
>   accesses to BOs" in it
> * Add extra comments to explain what we're doing
> ---
>  src/gallium/drivers/panfrost/pan_bo.c  | 112 ++++++++++++++++++++-----
>  src/gallium/drivers/panfrost/pan_bo.h  |   9 ++
>  src/gallium/drivers/panfrost/pan_job.c |  11 +++
>  3 files changed, 109 insertions(+), 23 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_bo.c b/src/gallium/drivers/panfrost/pan_bo.c
> index 9daddf9d0cc2..37602688d630 100644
> --- a/src/gallium/drivers/panfrost/pan_bo.c
> +++ b/src/gallium/drivers/panfrost/pan_bo.c
> @@ -23,6 +23,7 @@
>   * Authors (Collabora):
>   *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
>   */
> +#include <errno.h>
>  #include <stdio.h>
>  #include <fcntl.h>
>  #include <xf86drm.h>
> @@ -101,6 +102,63 @@ panfrost_bo_free(struct panfrost_bo *bo)
>          ralloc_free(bo);
>  }
>  
> +/* Returns true if the BO is ready, false otherwise.
> + * access_type is encoding the type of access one wants to ensure is done.
> + * Say you want to make sure all writers are done writing, you should pass
> + * PAN_BO_ACCESS_WRITE.
> + * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
> + * PAN_BO_ACCESS_READ would work too as waiting for readers implies
> + * waiting for writers as well, but we want to make things explicit and waiting
> + * only for readers is impossible.
> + */
> +bool
> +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
> +                 uint32_t access_type)
> +{
> +        struct drm_panfrost_wait_bo req = {
> +                .handle = bo->gem_handle,
> +		.timeout_ns = timeout_ns,
> +        };
> +        int ret;
> +
> +        assert(access_type == PAN_BO_ACCESS_WRITE ||
> +               access_type == PAN_BO_ACCESS_RW);
> +
> +        /* If the BO has been exported or imported we can't rely on the cached
> +         * state, we need to call the WAIT_BO ioctl.
> +         */
> +        if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
> +                /* If ->gpu_access is 0, the BO is idle, no need to wait. */
> +                if (!bo->gpu_access)
> +                        return true;
> +
> +                /* If the caller only wants to wait for writers and no
> +                 * writes are pending, we don't have to wait.
> +                 */
> +                if (access_type == PAN_BO_ACCESS_WRITE &&
> +                    !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
> +                        return true;
> +        }
> +
> +        /* The ioctl returns >= 0 value when the BO we are waiting for is ready
> +         * -1 otherwise.
> +         */
> +        ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
> +        if (ret != -1) {
> +                /* Set gpu_access to 0 so that the next call to bo_wait()
> +                 * doesn't have to call the WAIT_BO ioctl.
> +                 */
> +                bo->gpu_access = 0;
> +                return true;
> +        }
> +
> +        /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
> +         * is invalid, which shouldn't happen here.
> +         */
> +        assert(errno == ETIMEDOUT || errno == EBUSY);
> +        return false;
> +}
> +
>  /* Helper to calculate the bucket index of a BO */
>  
>  static unsigned
> @@ -137,9 +195,8 @@ pan_bucket(struct panfrost_screen *screen, unsigned size)
>   * BO. */
>  
>  static struct panfrost_bo *
> -panfrost_bo_cache_fetch(
> -                struct panfrost_screen *screen,
> -                size_t size, uint32_t flags)
> +panfrost_bo_cache_fetch(struct panfrost_screen *screen,
> +                        size_t size, uint32_t flags, bool dontwait)
>  {
>          pthread_mutex_lock(&screen->bo_cache_lock);
>          struct list_head *bucket = pan_bucket(screen, size);
> @@ -147,27 +204,30 @@ panfrost_bo_cache_fetch(
>  
>          /* Iterate the bucket looking for something suitable */
>          list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
> -                if (entry->size >= size &&
> -                    entry->flags == flags) {
> -                        int ret;
> -                        struct drm_panfrost_madvise madv;
> +                if (entry->size < size || entry->flags != flags)
> +                        continue;
>  
> -                        /* This one works, splice it out of the cache */
> -                        list_del(&entry->link);
> +                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
> +                                      PAN_BO_ACCESS_RW))
> +                        continue;
>  
> -                        madv.handle = entry->gem_handle;
> -                        madv.madv = PANFROST_MADV_WILLNEED;
> -                        madv.retained = 0;
> +                struct drm_panfrost_madvise madv = {
> +                        .handle = entry->gem_handle,
> +                        .madv = PANFROST_MADV_WILLNEED,
> +                };
> +                int ret;
>  
> -                        ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
> -                        if (!ret && !madv.retained) {
> -                                panfrost_bo_free(entry);
> -                                continue;
> -                        }
> -                        /* Let's go! */
> -                        bo = entry;
> -                        break;
> +                /* This one works, splice it out of the cache */
> +                list_del(&entry->link);
> +
> +                ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
> +                if (!ret && !madv.retained) {
> +                        panfrost_bo_free(entry);
> +                        continue;
>                  }
> +                /* Let's go! */
> +                bo = entry;
> +                break;
>          }
>          pthread_mutex_unlock(&screen->bo_cache_lock);
>  
> @@ -281,12 +341,18 @@ panfrost_bo_create(struct panfrost_screen *screen, size_t size,
>          if (flags & PAN_BO_GROWABLE)
>                  assert(flags & PAN_BO_INVISIBLE);
>  
> -        /* Before creating a BO, we first want to check the cache, otherwise,
> -         * the cache misses and we need to allocate a BO fresh from the kernel
> +        /* Before creating a BO, we first want to check the cache but without
> +         * waiting for BO readiness (BOs in the cache can still be referenced
> +         * by jobs that are not finished yet).
> +         * If the cached allocation fails we fall back on fresh BO allocation,
> +         * and if that fails too, we try one more time to allocate from the
> +         * cache, but this time we accept to wait.
>           */
> -        bo = panfrost_bo_cache_fetch(screen, size, flags);
> +        bo = panfrost_bo_cache_fetch(screen, size, flags, true);
>          if (!bo)
>                  bo = panfrost_bo_alloc(screen, size, flags);
> +        if (!bo)
> +                bo = panfrost_bo_cache_fetch(screen, size, flags, false);
>  
>          if (!bo)
>                  fprintf(stderr, "BO creation failed\n");
> diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h
> index e4743f820aeb..022a56d9ca34 100644
> --- a/src/gallium/drivers/panfrost/pan_bo.h
> +++ b/src/gallium/drivers/panfrost/pan_bo.h
> @@ -100,6 +100,12 @@ struct panfrost_bo {
>          int gem_handle;
>  
>          uint32_t flags;
> +
> +        /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
> +         * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
> +         * when the BO is idle.
> +         */
> +        uint32_t gpu_access;
>  };
>  
>  static inline uint32_t
> @@ -113,6 +119,9 @@ panfrost_bo_access_for_stage(enum pipe_shader_type stage)
>                 PAN_BO_ACCESS_VERTEX_TILER;
>  }
>  
> +bool
> +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
> +                 uint32_t access_type);
>  void
>  panfrost_bo_reference(struct panfrost_bo *bo);
>  void
> diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
> index 235cb21dc8c8..a56f4044fda0 100644
> --- a/src/gallium/drivers/panfrost/pan_job.c
> +++ b/src/gallium/drivers/panfrost/pan_job.c
> @@ -810,8 +810,19 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
>  
>          hash_table_foreach(batch->bos, entry) {
>                  struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
> +                uint32_t flags = (uintptr_t)entry->data;
> +
>                  assert(bo->gem_handle > 0);
>                  bo_handles[submit.bo_handle_count++] = bo->gem_handle;
> +
> +                /* Update the BO access flags so that panfrost_bo_wait() knows
> +                 * about all pending accesses.
> +                 * We only keep the READ/WRITE info since this is all the BO
> +                 * wait logic cares about.
> +                 * We also preserve existing flags as this batch might not
> +                 * be the first one to access the BO.
> +                 */
> +                bo->gpu_access |= flags & (PAN_BO_ACCESS_RW);
>          }
>  
>          submit.bo_handles = (u64) (uintptr_t) bo_handles;
> -- 
> 2.21.0