[v2,24/37] panfrost: Cache GPU accesses to BOs

Submitted by Boris Brezillon on Sept. 16, 2019, 9:37 a.m.

Details

Message ID 20190916093715.32203-25-boris.brezillon@collabora.com
State New
Headers show
Series "panfrost: Support batch pipelining" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Boris Brezillon Sept. 16, 2019, 9:37 a.m.
This way we can avoid calling ioctl(WAIT_BO) when we already know the
BO is idle because it hasn't been touched by a GPU job or because the
previous call to panfrost_bo_wait() returned true.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 src/gallium/drivers/panfrost/pan_bo.c  | 40 +++++++++++++++++++++++---
 src/gallium/drivers/panfrost/pan_bo.h  |  9 +++++-
 src/gallium/drivers/panfrost/pan_job.c |  7 +++++
 3 files changed, 51 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/panfrost/pan_bo.c b/src/gallium/drivers/panfrost/pan_bo.c
index 4aabd5fd23ab..c3b62c369f3e 100644
--- a/src/gallium/drivers/panfrost/pan_bo.c
+++ b/src/gallium/drivers/panfrost/pan_bo.c
@@ -102,9 +102,18 @@  panfrost_bo_free(struct panfrost_bo *bo)
         ralloc_free(bo);
 }
 
-/* Returns true if the BO is ready, false otherwise. */
+/* Returns true if the BO is ready, false otherwise.
+ * access_type is encoding the type of access one wants to ensure is done.
+ * Say you want to make sure all writers are done writing, you should pass
+ * PAN_BO_GPU_ACCESS_WRITE.
+ * If you want to wait for all users, you should pass PAN_BO_GPU_ACCESS_RW.
+ * PAN_BO_GPU_ACCESS_READ would work too as waiting for readers implies
+ * waiting for writers as well, but we want to make things explicit and waiting
+ * only for readers is impossible.
+ */
 bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns)
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                 uint32_t access_type)
 {
         struct drm_panfrost_wait_bo req = {
                 .handle = bo->gem_handle,
@@ -112,12 +121,34 @@  panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns)
         };
         int ret;
 
+        assert(access_type == PAN_BO_GPU_ACCESS_WRITE ||
+               access_type == PAN_BO_GPU_ACCESS_RW);
+
+        /* If the BO has been exported or imported we can't rely on the cached
+         * state, we need to call the WAIT_BO ioctl.
+         */
+        if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
+                /* If ->gpu_access is 0, the BO is idle, and if the WRITE flag
+                 * is cleared, that means we only have readers.
+                 */
+                if (!bo->gpu_access)
+                        return true;
+                else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
+                         !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
+                        return true;
+        }
+
         /* The ioctl returns >= 0 value when the BO we are waiting for is ready
          * -1 otherwise.
          */
         ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
-        if (ret != -1)
+        if (ret != -1) {
+                /* Set gpu_access to 0 so that the next call to bo_wait()
+                 * doesn't have to call the WAIT_BO ioctl.
+                 */
+                bo->gpu_access = 0;
                 return true;
+        }
 
         /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
          * is invalid, which shouldn't happen here.
@@ -174,7 +205,8 @@  panfrost_bo_cache_fetch(struct panfrost_screen *screen,
                 if (entry->size < size || entry->flags != flags)
                         continue;
 
-                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX))
+                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
+                                      PAN_BO_GPU_ACCESS_RW))
                         continue;
 
                 struct drm_panfrost_madvise madv = {
diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h
index a4b4d05b96e9..60ccb0c075a6 100644
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -100,10 +100,17 @@  struct panfrost_bo {
         int gem_handle;
 
         uint32_t flags;
+
+        /* Combination of PAN_BO_GPU_ACCESS_{READ,WRITE} flags encoding pending
+         * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
+         * when the BO is idle.
+         */
+        uint32_t gpu_access;
 };
 
 bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns);
+panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                 uint32_t access_type);
 void
 panfrost_bo_reference(struct panfrost_bo *bo);
 void
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 739f36a593f1..30720ab98bb9 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -389,8 +389,15 @@  panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
 
         hash_table_foreach(batch->bos, entry) {
                 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                uint32_t flags = (uintptr_t)entry->data;
+
                 assert(bo->gem_handle > 0);
                 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
+
+                /* Update the BO access flags so that panfrost_bo_wait() knows
+                 * about all pending accesses.
+                 */
+                bo->gpu_access |= flags & (PAN_BO_GPU_ACCESS_RW);
         }
 
         submit.bo_handles = (u64) (uintptr_t) bo_handles;

Comments

> +                /* If ->gpu_access is 0, the BO is idle, and if the WRITE flag
> +                 * is cleared, that means we only have readers.
> +                 */
> +                if (!bo->gpu_access)
> +                        return true;
> +                else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
> +                         !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
> +                        return true;

The second condition is a little confusing, though I think it's correct.
Not sure if there's any way to clarify what's meant but just thought I'd
comment, since inevitably future readers will squint too.

> +                /* Update the BO access flags so that panfrost_bo_wait() knows
> +                 * about all pending accesses.
> +                 */
> +                bo->gpu_access |= flags & (PAN_BO_GPU_ACCESS_RW);

This looks like black magic. Maybe just clarify in the comment why this
& is reasonable (relying on bit mask magic).

---

That aside, as I mentioned it would maybe make more sense to squash this
into the patch introduce the bo_wait ioctl() in the first place? If
that's too complicated with merge conflicts and stuff, don't sweat it,
though :)
On Mon, 16 Sep 2019 10:05:52 -0400
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:

> > +                /* If ->gpu_access is 0, the BO is idle, and if the WRITE flag
> > +                 * is cleared, that means we only have readers.
> > +                 */
> > +                if (!bo->gpu_access)
> > +                        return true;
> > +                else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
> > +                         !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
> > +                        return true;  
> 
> The second condition is a little confusing, though I think it's correct.
> Not sure if there's any way to clarify what's meant but just thought I'd
> comment, since inevitably future readers will squint too.

I can do:

		/* If ->gpu_access is 0, the BO is idle, no need to wait. */
		if (!bo->gpu_access)
			return true;

		/* If the caller only wants to wait for writers and no
		 * writes are pending, we don't have to wait.
		 */
		if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
		    !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
			return true;

instead.

> 
> > +                /* Update the BO access flags so that panfrost_bo_wait() knows
> > +                 * about all pending accesses.
> > +                 */
> > +                bo->gpu_access |= flags & (PAN_BO_GPU_ACCESS_RW);  
> 
> This looks like black magic. Maybe just clarify in the comment why this
> & is reasonable (relying on bit mask magic).

It's just here to clear all non-RW flags (we only care about the read/write
information when it comes to BO idleness). I'll add a comment to explain that
part, and maybe another one to explain why we have a '|=' and not just '='.

> 
> ---
> 
> That aside, as I mentioned it would maybe make more sense to squash this
> into the patch introduce the bo_wait ioctl() in the first place? If
> that's too complicated with merge conflicts and stuff, don't sweat it,
> though :)

I'm fine with that, I'll re-order things to avoid introducing the bo_wait()
infra before we have the access type info.
> > > +                /* If ->gpu_access is 0, the BO is idle, and if the WRITE flag
> > > +                 * is cleared, that means we only have readers.
> > > +                 */
> > > +                if (!bo->gpu_access)
> > > +                        return true;
> > > +                else if (!(access_type & PAN_BO_GPU_ACCESS_READ) &&
> > > +                         !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
> > > +                        return true;  
> > 
> > The second condition is a little confusing, though I think it's correct.
> > Not sure if there's any way to clarify what's meant but just thought I'd
> > comment, since inevitably future readers will squint too.
> 
> I can do:
> 
> 		/* If ->gpu_access is 0, the BO is idle, no need to wait. */
> 		if (!bo->gpu_access)
> 			return true;
> 
> 		/* If the caller only wants to wait for writers and no
> 		 * writes are pending, we don't have to wait.
> 		 */
> 		if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
> 		    !(bo->gpu_access & PAN_BO_GPU_ACCESS_WRITE))
> 			return true;
> 
> instead.

Perfect, thank you :)

> > > +                /* Update the BO access flags so that panfrost_bo_wait() knows
> > > +                 * about all pending accesses.
> > > +                 */
> > > +                bo->gpu_access |= flags & (PAN_BO_GPU_ACCESS_RW);  
> > 
> > This looks like black magic. Maybe just clarify in the comment why this
> > & is reasonable (relying on bit mask magic).
> 
> It's just here to clear all non-RW flags (we only care about the read/write
> information when it comes to BO idleness). I'll add a comment to explain that
> part, and maybe another one to explain why we have a '|=' and not just '='.

Yeah, I figured it out. Mostly, REing means we have so much unexplained
bit magic in the codebase already, we don't need to add more if we can
help it :) Hence the comment would be appreciated.

> > That aside, as I mentioned it would maybe make more sense to squash this
> > into the patch introduce the bo_wait ioctl() in the first place? If
> > that's too complicated with merge conflicts and stuff, don't sweat it,
> > though :)
> 
> I'm fine with that, I'll re-order things to avoid introducing the bo_wait()
> infra before we have the access type info.

+1