drm/syncobj: add sync obj wait interface. (v6)

Submitted by Xie, AlexBin on July 10, 2017, 3:58 p.m.

Details

Message ID DM5PR12MB12574F31128759A47DA1C7B7F2A90@DM5PR12MB1257.namprd12.prod.outlook.com
State New
Headers show
Series "drm/syncobj: add sync obj wait interface. (v6)" ( rev: 2 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Xie, AlexBin July 10, 2017, 3:58 p.m.
I understand this discussion from closes source driver terminology.

If a process is killed before it sends out the signaling command, will some part of the GPU be in a waiting situation forever?

Alex Bin Xie
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Jason Ekstrand

Sent: Monday, July 10, 2017 11:53 AM
To: Christian König <deathsimple@vodafone.de>
Cc: Dave Airlie <airlied@gmail.com>; Maling list - DRI developers <dri-devel@lists.freedesktop.org>; amd-gfx mailing list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6)

On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de<mailto:deathsimple@vodafone.de>> wrote:
Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com<mailto:airlied@gmail.com>> wrote:
From: Dave Airlie <airlied@redhat.com<mailto:airlied@redhat.com>>


This interface will allow sync object to be used to back
Vulkan fences. This API is pretty much the vulkan fence waiting
API, and I've ported the code from amdgpu.

v2: accept relative timeout, pass remaining time back
to userspace.
v3: return to absolute timeouts.
v4: absolute zero = poll,
    rewrite any/all code to have same operation for arrays
    return -EINVAL for 0 fences.
v4.1: fixup fences allocation check, use u64_to_user_ptr
v5: move to sec/nsec, and use timespec64 for calcs.
v6: use -ETIME and drop the out status flag. (-ETIME
is suggested by ickle, I can feel a shed painting)

Signed-off-by: Dave Airlie <airlied@redhat.com<mailto:airlied@redhat.com>>

---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 142 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  13 ++++
 4 files changed, 159 insertions(+)

--
2.9.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/dri-devel



_______________________________________________

amd-gfx mailing list

amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>

https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 5cecc97..d71b50d 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -157,3 +157,5 @@  int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+                          struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f1e5681..385ce74 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -657,6 +657,8 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };

 #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 89441bc..2d5a7a1 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1,5 +1,7 @@ 
 /*
  * Copyright 2017 Red Hat
+ * Parts ported from amdgpu (fence wait code).
+ * Copyright 2016 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +33,9 @@ 
  * that contain an optional fence. The fence can be updated with a new
  * fence, or be NULL.
  *
+ * syncobj's can be waited upon, where it will wait for the underlying
+ * fence.
+ *
  * syncobj's can be export to fd's and back, these fd's are opaque and
  * have no other use case, except passing the syncobj between processes.
  *
@@ -451,3 +456,140 @@  drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
        return drm_syncobj_fd_to_handle(file_private, args->fd,
                                        &args->handle);
 }
+
+/**
+ * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
+ *
+ * @timeout_sec: timeout sec component, 0 for poll
+ * @timeout_nsec: timeout nsec component in ns, 0 for poll
+ * both must be 0 for poll.
+ *
+ * Calculate the timeout in jiffies from an absolute time in sec/nsec.
+ */
+static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, uint64_t timeout_nsec)
+{
+       struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
+       unsigned long timeout_jiffies;
+
+       /* make 0 timeout means poll - absolute 0 doesn't seem valid */
+       if (timeout_sec == 0 && timeout_nsec == 0)
+               return 0;
+
+       abs_timeout.tv_sec = timeout_sec;
+       abs_timeout.tv_nsec = timeout_nsec;
+
+       /* clamp timeout if it's to large */
+       if (!timespec64_valid_strict(&abs_timeout))
+               return MAX_SCHEDULE_TIMEOUT - 1;
+
+       timeout = timespec64_sub(abs_timeout, ktime_to_timespec64(ktime_get()));
+       if (!timespec64_valid(&timeout))
+               return 0;
+
+       jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
+       if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
+               return MAX_SCHEDULE_TIMEOUT - 1;
+
+       timeout_jiffies = timespec64_to_jiffies(&timeout);
+       /*  clamp timeout to avoid infinite timeout */
+       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
+               return MAX_SCHEDULE_TIMEOUT - 1;
+
+       return timeout_jiffies + 1;
+}
+
+static int drm_syncobj_wait_fences(struct drm_device *dev,
+                                  struct drm_file *file_private,
+                                  struct drm_syncobj_wait *wait,
+                                  struct dma_fence **fences)
+{
+       unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec);
+       int ret = 0;
+       uint32_t first = ~0;
+
+       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+               int i;
+               for (i = 0; i < wait->count_handles; i++) {
+                       ret = dma_fence_wait_timeout(fences[i], true, timeout);
+
+                       if (ret < 0)
+                               return ret;
+                       if (ret == 0)
+                               break;
+                       timeout = ret;
+               }
+               first = 0;
+       } else {
+               ret = dma_fence_wait_any_timeout(fences,
+                                                wait->count_handles,
+                                                true, timeout,
+                                                &first);
+       }
+
+       if (ret < 0)
+               return ret;
+
+       wait->first_signaled = first;
+       if (ret == 0)
+               return -ETIME;
+       return 0;
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_private)
+{
+       struct drm_syncobj_wait *args = data;
+       uint32_t *handles;
+       struct dma_fence **fences;
+       int ret = 0;
+       int i;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+               return -EINVAL;
+
+       if (args->count_handles == 0)
+               return -EINVAL;
+
+       /* Get the handles from userspace */
+       handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
+                               GFP_KERNEL);
+       if (handles == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(handles,
+                          u64_to_user_ptr(args->handles),
+                          sizeof(uint32_t) * args->count_handles)) {
+               ret = -EFAULT;
+               goto err_free_handles;
+       }
+
+       fences = kcalloc(args->count_handles,
+                        sizeof(struct dma_fence *), GFP_KERNEL);
+       if (!fences) {
+               ret = -ENOMEM;
+               goto err_free_handles;
+       }
+
+       for (i = 0; i < args->count_handles; i++) {
+               ret = drm_syncobj_fence_get(file_private, handles[i],
+                                           &fences[i]);
+               if (ret)
+                       goto err_free_fence_array;
+       }
+
+       ret = drm_syncobj_wait_fences(dev, file_private,
+                                     args, fences);

So, reading some CTS tests again, and I think we have a problem here.  The Vulkan spec allows you to wait on a fence that is in the unsignaled state.

At least on the closed source driver that would be illegal as far as I know.

Then they are doing workarounds in userspace.  There are definitely CTS tests for this:

https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74

You can't wait on a semaphore before the signal operation is send down to the kerel.

We (Intel) deal with this today by tracking whether or not the fence has been submitted and using a condition variable in userspace to sort it all out.  If we ever want to share fences across processes (which we do), then this needs to be sorted in the kernel.
--Jason


Regards,
Christian.



  In theory, you could have thread A start waiting on a fence before thread B submits the work which triggers that fence.  This means that the dma_fence may not exist yet when vkWaitForFences gets called.  If we really want to support the full Vulkan usage, we need to somehow support missing dma_fences by waiting for the dma_fence to show up.  Unfortunately, I don't know enough about the internal kernel APIs to know what that would look like.

+
+err_free_fence_array:
+       for (i = 0; i < args->count_handles; i++)
+               dma_fence_put(fences[i]);
+       kfree(fences);
+err_free_handles:
+       kfree(handles);
+
+       return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 101593a..91746a7 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -718,6 +718,18 @@  struct drm_syncobj_handle {
        __u32 pad;
 };

+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+struct drm_syncobj_wait {
+       __u64 handles;
+       /* absolute timeout */
+       __s64 timeout_sec;
+       __s64 timeout_nsec;
+       __u32 count_handles;
+       __u32 flags;
+       __u32 first_signaled; /* only valid when not waiting all */
+       __u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +852,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct drm_syncobj_wait)

 /**
  * Device specific ioctls should only be in their respective headers

Comments

Am 10.07.2017 um 17:58 schrieb Xie, AlexBin:
>
> I understand this discussion from closes source driver terminology.
>
> If a process is killed before it sends out the signaling command, will 
> some part of the GPU be in a waiting situation forever?
>

Yes, exactly that's the problem here and the reason why that even 
Microsoft forbids that under windows.

Christian.

> Alex Bin Xie
>
> *From:*amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] *On 
> Behalf Of *Jason Ekstrand
> *Sent:* Monday, July 10, 2017 11:53 AM
> *To:* Christian König <deathsimple@vodafone.de>
> *Cc:* Dave Airlie <airlied@gmail.com>; Maling list - DRI developers 
> <dri-devel@lists.freedesktop.org>; amd-gfx mailing list 
> <amd-gfx@lists.freedesktop.org>
> *Subject:* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6)
>
> On Mon, Jul 10, 2017 at 8:45 AM, Christian König 
> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>
>     Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>
>         On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com
>         <mailto:airlied@gmail.com>> wrote:
>
>             From: Dave Airlie <airlied@redhat.com
>             <mailto:airlied@redhat.com>>
>
>             This interface will allow sync object to be used to back
>             Vulkan fences. This API is pretty much the vulkan fence
>             waiting
>             API, and I've ported the code from amdgpu.
>
>             v2: accept relative timeout, pass remaining time back
>             to userspace.
>             v3: return to absolute timeouts.
>             v4: absolute zero = poll,
>                 rewrite any/all code to have same operation for arrays
>                 return -EINVAL for 0 fences.
>             v4.1: fixup fences allocation check, use u64_to_user_ptr
>             v5: move to sec/nsec, and use timespec64 for calcs.
>             v6: use -ETIME and drop the out status flag. (-ETIME
>             is suggested by ickle, I can feel a shed painting)
>
>             Signed-off-by: Dave Airlie <airlied@redhat.com
>             <mailto:airlied@redhat.com>>
>             ---
>              drivers/gpu/drm/drm_internal.h |   2 +
>              drivers/gpu/drm/drm_ioctl.c    |   2 +
>              drivers/gpu/drm/drm_syncobj.c  | 142
>             +++++++++++++++++++++++++++++++++++++++++
>              include/uapi/drm/drm.h         |  13 ++++
>              4 files changed, 159 insertions(+)
>
>             diff --git a/drivers/gpu/drm/drm_internal.h
>             b/drivers/gpu/drm/drm_internal.h
>             index 5cecc97..d71b50d 100644
>             --- a/drivers/gpu/drm/drm_internal.h
>             +++ b/drivers/gpu/drm/drm_internal.h
>             @@ -157,3 +157,5 @@ int
>             drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev,
>             void *data,
>              struct drm_file *file_private);
>              int drm_syncobj_fd_to_handle_ioctl(struct drm_device
>             *dev, void *data,
>              struct drm_file *file_private);
>             +int drm_syncobj_wait_ioctl(struct drm_device *dev, void
>             *data,
>             +                          struct drm_file *file_private);
>             diff --git a/drivers/gpu/drm/drm_ioctl.c
>             b/drivers/gpu/drm/drm_ioctl.c
>             index f1e5681..385ce74 100644
>             --- a/drivers/gpu/drm/drm_ioctl.c
>             +++ b/drivers/gpu/drm/drm_ioctl.c
>             @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc
>             drm_ioctls[] = {
>             DRM_UNLOCKED|DRM_RENDER_ALLOW),
>             DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
>             drm_syncobj_fd_to_handle_ioctl,
>             DRM_UNLOCKED|DRM_RENDER_ALLOW),
>             +  DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT,
>             drm_syncobj_wait_ioctl,
>             +  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>              };
>
>              #define DRM_CORE_IOCTL_COUNT  ARRAY_SIZE( drm_ioctls )
>             diff --git a/drivers/gpu/drm/drm_syncobj.c
>             b/drivers/gpu/drm/drm_syncobj.c
>             index 89441bc..2d5a7a1 100644
>             --- a/drivers/gpu/drm/drm_syncobj.c
>             +++ b/drivers/gpu/drm/drm_syncobj.c
>             @@ -1,5 +1,7 @@
>              /*
>               * Copyright 2017 Red Hat
>             + * Parts ported from amdgpu (fence wait code).
>             + * Copyright 2016 Advanced Micro Devices, Inc.
>               *
>               * Permission is hereby granted, free of charge, to any
>             person obtaining a
>               * copy of this software and associated documentation
>             files (the "Software"),
>             @@ -31,6 +33,9 @@
>               * that contain an optional fence. The fence can be
>             updated with a new
>               * fence, or be NULL.
>               *
>             + * syncobj's can be waited upon, where it will wait for
>             the underlying
>             + * fence.
>             + *
>               * syncobj's can be export to fd's and back, these fd's
>             are opaque and
>               * have no other use case, except passing the syncobj
>             between processes.
>               *
>             @@ -451,3 +456,140 @@
>             drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev,
>             void *data,
>                     return drm_syncobj_fd_to_handle(file_private,
>             args->fd,
>               &args->handle);
>              }
>             +
>             +/**
>             + * drm_timeout_abs_to_jiffies - calculate jiffies timeout
>             from absolute value
>             + *
>             + * @timeout_sec: timeout sec component, 0 for poll
>             + * @timeout_nsec: timeout nsec component in ns, 0 for poll
>             + * both must be 0 for poll.
>             + *
>             + * Calculate the timeout in jiffies from an absolute time
>             in sec/nsec.
>             + */
>             +static unsigned long drm_timeout_abs_to_jiffies(int64_t
>             timeout_sec, uint64_t timeout_nsec)
>             +{
>             +       struct timespec64 abs_timeout, timeout,
>             max_jiffy_timespec;
>             +       unsigned long timeout_jiffies;
>             +
>             +       /* make 0 timeout means poll - absolute 0 doesn't
>             seem valid */
>             +       if (timeout_sec == 0 && timeout_nsec == 0)
>             +               return 0;
>             +
>             +       abs_timeout.tv_sec = timeout_sec;
>             +       abs_timeout.tv_nsec = timeout_nsec;
>             +
>             +       /* clamp timeout if it's to large */
>             +       if (!timespec64_valid_strict(&abs_timeout))
>             +               return MAX_SCHEDULE_TIMEOUT - 1;
>             +
>             +       timeout = timespec64_sub(abs_timeout,
>             ktime_to_timespec64(ktime_get()));
>             +       if (!timespec64_valid(&timeout))
>             +               return 0;
>             +
>             +  jiffies_to_timespec64(MAX_JIFFY_OFFSET,
>             &max_jiffy_timespec);
>             +       if (timespec64_compare(&timeout,
>             &max_jiffy_timespec) >= 0)
>             +               return MAX_SCHEDULE_TIMEOUT - 1;
>             +
>             +       timeout_jiffies = timespec64_to_jiffies(&timeout);
>             +       /*  clamp timeout to avoid infinite timeout */
>             +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
>             +               return MAX_SCHEDULE_TIMEOUT - 1;
>             +
>             +       return timeout_jiffies + 1;
>             +}
>             +
>             +static int drm_syncobj_wait_fences(struct drm_device *dev,
>             + struct drm_file *file_private,
>             + struct drm_syncobj_wait *wait,
>             + struct dma_fence **fences)
>             +{
>             +       unsigned long timeout =
>             drm_timeout_abs_to_jiffies(wait->timeout_sec,
>             wait->timeout_nsec);
>             +       int ret = 0;
>             +       uint32_t first = ~0;
>             +
>             +       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>             +               int i;
>             +               for (i = 0; i < wait->count_handles; i++) {
>             +                       ret =
>             dma_fence_wait_timeout(fences[i], true, timeout);
>             +
>             +                       if (ret < 0)
>             +                               return ret;
>             +                       if (ret == 0)
>             +                               break;
>             +                       timeout = ret;
>             +               }
>             +               first = 0;
>             +       } else {
>             +               ret = dma_fence_wait_any_timeout(fences,
>             +           wait->count_handles,
>             +           true, timeout,
>             +           &first);
>             +       }
>             +
>             +       if (ret < 0)
>             +               return ret;
>             +
>             +       wait->first_signaled = first;
>             +       if (ret == 0)
>             +               return -ETIME;
>             +       return 0;
>             +}
>             +
>             +int
>             +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>             +                      struct drm_file *file_private)
>             +{
>             +       struct drm_syncobj_wait *args = data;
>             +       uint32_t *handles;
>             +       struct dma_fence **fences;
>             +       int ret = 0;
>             +       int i;
>             +
>             +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>             +               return -ENODEV;
>             +
>             +       if (args->flags != 0 && args->flags !=
>             DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>             +               return -EINVAL;
>             +
>             +       if (args->count_handles == 0)
>             +               return -EINVAL;
>             +
>             +       /* Get the handles from userspace */
>             +       handles = kmalloc_array(args->count_handles,
>             sizeof(uint32_t),
>             +  GFP_KERNEL);
>             +       if (handles == NULL)
>             +               return -ENOMEM;
>             +
>             +       if (copy_from_user(handles,
>             + u64_to_user_ptr(args->handles),
>             + sizeof(uint32_t) * args->count_handles)) {
>             +               ret = -EFAULT;
>             +               goto err_free_handles;
>             +       }
>             +
>             +       fences = kcalloc(args->count_handles,
>             +                        sizeof(struct dma_fence *),
>             GFP_KERNEL);
>             +       if (!fences) {
>             +               ret = -ENOMEM;
>             +               goto err_free_handles;
>             +       }
>             +
>             +       for (i = 0; i < args->count_handles; i++) {
>             +               ret = drm_syncobj_fence_get(file_private,
>             handles[i],
>             +      &fences[i]);
>             +               if (ret)
>             +                       goto err_free_fence_array;
>             +       }
>             +
>             +       ret = drm_syncobj_wait_fences(dev, file_private,
>             +  args, fences);
>
>         So, reading some CTS tests again, and I think we have a
>         problem here.  The Vulkan spec allows you to wait on a fence
>         that is in the unsignaled state.
>
>     At least on the closed source driver that would be illegal as far
>     as I know.
>
> Then they are doing workarounds in userspace.  There are definitely 
> CTS tests for this:
>
> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>
>     You can't wait on a semaphore before the signal operation is send
>     down to the kerel.
>
> We (Intel) deal with this today by tracking whether or not the fence 
> has been submitted and using a condition variable in userspace to sort 
> it all out.  If we ever want to share fences across processes (which 
> we do), then this needs to be sorted in the kernel.
>
> --Jason
>
>     Regards,
>     Christian.
>
>
>
>
>           In theory, you could have thread A start waiting on a fence
>         before thread B submits the work which triggers that fence. 
>         This means that the dma_fence may not exist yet when
>         vkWaitForFences gets called.  If we really want to support the
>         full Vulkan usage, we need to somehow support missing
>         dma_fences by waiting for the dma_fence to show up.
>         Unfortunately, I don't know enough about the internal kernel
>         APIs to know what that would look like.
>
>             +
>             +err_free_fence_array:
>             +       for (i = 0; i < args->count_handles; i++)
>             +  dma_fence_put(fences[i]);
>             +       kfree(fences);
>             +err_free_handles:
>             +       kfree(handles);
>             +
>             +       return ret;
>             +}
>             diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>             index 101593a..91746a7 100644
>             --- a/include/uapi/drm/drm.h
>             +++ b/include/uapi/drm/drm.h
>             @@ -718,6 +718,18 @@ struct drm_syncobj_handle {
>                     __u32 pad;
>              };
>
>             +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>             +struct drm_syncobj_wait {
>             +       __u64 handles;
>             +       /* absolute timeout */
>             +       __s64 timeout_sec;
>             +       __s64 timeout_nsec;
>             +       __u32 count_handles;
>             +       __u32 flags;
>             +       __u32 first_signaled; /* only valid when not
>             waiting all */
>             +       __u32 pad;
>             +};
>             +
>              #if defined(__cplusplus)
>              }
>              #endif
>             @@ -840,6 +852,7 @@ extern "C" {
>              #define DRM_IOCTL_SYNCOBJ_DESTROY   DRM_IOWR(0xC0, struct
>             drm_syncobj_destroy)
>              #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1,
>             struct drm_syncobj_handle)
>              #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2,
>             struct drm_syncobj_handle)
>             +#define DRM_IOCTL_SYNCOBJ_WAIT  DRM_IOWR(0xC3, struct
>             drm_syncobj_wait)
>
>              /**
>               * Device specific ioctls should only be in their
>             respective headers
>             --
>             2.9.4
>
>             _______________________________________________
>             dri-devel mailing list
>             dri-devel@lists.freedesktop.org
>             <mailto:dri-devel@lists.freedesktop.org>
>             https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>         _______________________________________________
>
>         amd-gfx mailing list
>
>         amd-gfx@lists.freedesktop.org
>         <mailto:amd-gfx@lists.freedesktop.org>
>
>         https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
On Mon, Jul 10, 2017 at 9:13 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 10.07.2017 um 17:58 schrieb Xie, AlexBin:
>
> I understand this discussion from closes source driver terminology.
>
>
>
> If a process is killed before it sends out the signaling command, will
> some part of the GPU be in a waiting situation forever?
>
>
> Yes, exactly that's the problem here and the reason why that even
> Microsoft forbids that under windows.
>

To be clear, we are only discussing wait-before-submit for client CPU
waits.  The GPU will not be waiting, only some userspace process.  If that
process is a compositor, it should take measures to avoid getting hung up
by bad clients.  What this will *not* cause is the GPU (or kernel GPU
scheduling) to get hung up on waiting for some unsubmitted thing.


> Christian.
>
>
>
>
> Alex Bin Xie
>
> *From:* amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org
> <amd-gfx-bounces@lists.freedesktop.org>] *On Behalf Of *Jason Ekstrand
> *Sent:* Monday, July 10, 2017 11:53 AM
> *To:* Christian König <deathsimple@vodafone.de> <deathsimple@vodafone.de>
> *Cc:* Dave Airlie <airlied@gmail.com> <airlied@gmail.com>; Maling list -
> DRI developers <dri-devel@lists.freedesktop.org>
> <dri-devel@lists.freedesktop.org>; amd-gfx mailing list
> <amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org>
> *Subject:* Re: [PATCH] drm/syncobj: add sync obj wait interface. (v6)
>
>
>
> On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de>
> wrote:
>
> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>
> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote:
>
> From: Dave Airlie <airlied@redhat.com>
>
> This interface will allow sync object to be used to back
> Vulkan fences. This API is pretty much the vulkan fence waiting
> API, and I've ported the code from amdgpu.
>
> v2: accept relative timeout, pass remaining time back
> to userspace.
> v3: return to absolute timeouts.
> v4: absolute zero = poll,
>     rewrite any/all code to have same operation for arrays
>     return -EINVAL for 0 fences.
> v4.1: fixup fences allocation check, use u64_to_user_ptr
> v5: move to sec/nsec, and use timespec64 for calcs.
> v6: use -ETIME and drop the out status flag. (-ETIME
> is suggested by ickle, I can feel a shed painting)
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  drivers/gpu/drm/drm_internal.h |   2 +
>  drivers/gpu/drm/drm_ioctl.c    |   2 +
>  drivers/gpu/drm/drm_syncobj.c  | 142 ++++++++++++++++++++++++++++++
> +++++++++++
>  include/uapi/drm/drm.h         |  13 ++++
>  4 files changed, 159 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_
> internal.h
> index 5cecc97..d71b50d 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device
> *dev, void *data,
>                                    struct drm_file *file_private);
>  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>                                    struct drm_file *file_private);
> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                          struct drm_file *file_private);
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index f1e5681..385ce74 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
> drm_syncobj_fd_to_handle_ioctl,
>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
> +                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>
>  #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 89441bc..2d5a7a1 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1,5 +1,7 @@
>  /*
>   * Copyright 2017 Red Hat
> + * Parts ported from amdgpu (fence wait code).
> + * Copyright 2016 Advanced Micro Devices, Inc.
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a
>   * copy of this software and associated documentation files (the
> "Software"),
> @@ -31,6 +33,9 @@
>   * that contain an optional fence. The fence can be updated with a new
>   * fence, or be NULL.
>   *
> + * syncobj's can be waited upon, where it will wait for the underlying
> + * fence.
> + *
>   * syncobj's can be export to fd's and back, these fd's are opaque and
>   * have no other use case, except passing the syncobj between processes.
>   *
> @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
> *dev, void *data,
>         return drm_syncobj_fd_to_handle(file_private, args->fd,
>                                         &args->handle);
>  }
> +
> +/**
> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute
> value
> + *
> + * @timeout_sec: timeout sec component, 0 for poll
> + * @timeout_nsec: timeout nsec component in ns, 0 for poll
> + * both must be 0 for poll.
> + *
> + * Calculate the timeout in jiffies from an absolute time in sec/nsec.
> + */
> +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec,
> uint64_t timeout_nsec)
> +{
> +       struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
> +       unsigned long timeout_jiffies;
> +
> +       /* make 0 timeout means poll - absolute 0 doesn't seem valid */
> +       if (timeout_sec == 0 && timeout_nsec == 0)
> +               return 0;
> +
> +       abs_timeout.tv_sec = timeout_sec;
> +       abs_timeout.tv_nsec = timeout_nsec;
> +
> +       /* clamp timeout if it's to large */
> +       if (!timespec64_valid_strict(&abs_timeout))
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       timeout = timespec64_sub(abs_timeout,
> ktime_to_timespec64(ktime_get()));
> +       if (!timespec64_valid(&timeout))
> +               return 0;
> +
> +       jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
> +       if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       timeout_jiffies = timespec64_to_jiffies(&timeout);
> +       /*  clamp timeout to avoid infinite timeout */
> +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       return timeout_jiffies + 1;
> +}
> +
> +static int drm_syncobj_wait_fences(struct drm_device *dev,
> +                                  struct drm_file *file_private,
> +                                  struct drm_syncobj_wait *wait,
> +                                  struct dma_fence **fences)
> +{
> +       unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec,
> wait->timeout_nsec);
> +       int ret = 0;
> +       uint32_t first = ~0;
> +
> +       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
> +               int i;
> +               for (i = 0; i < wait->count_handles; i++) {
> +                       ret = dma_fence_wait_timeout(fences[i], true,
> timeout);
> +
> +                       if (ret < 0)
> +                               return ret;
> +                       if (ret == 0)
> +                               break;
> +                       timeout = ret;
> +               }
> +               first = 0;
> +       } else {
> +               ret = dma_fence_wait_any_timeout(fences,
> +                                                wait->count_handles,
> +                                                true, timeout,
> +                                                &first);
> +       }
> +
> +       if (ret < 0)
> +               return ret;
> +
> +       wait->first_signaled = first;
> +       if (ret == 0)
> +               return -ETIME;
> +       return 0;
> +}
> +
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *file_private)
> +{
> +       struct drm_syncobj_wait *args = data;
> +       uint32_t *handles;
> +       struct dma_fence **fences;
> +       int ret = 0;
> +       int i;
> +
> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +               return -ENODEV;
> +
> +       if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_
> ALL)
> +               return -EINVAL;
> +
> +       if (args->count_handles == 0)
> +               return -EINVAL;
> +
> +       /* Get the handles from userspace */
> +       handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
> +                               GFP_KERNEL);
> +       if (handles == NULL)
> +               return -ENOMEM;
> +
> +       if (copy_from_user(handles,
> +                          u64_to_user_ptr(args->handles),
> +                          sizeof(uint32_t) * args->count_handles)) {
> +               ret = -EFAULT;
> +               goto err_free_handles;
> +       }
> +
> +       fences = kcalloc(args->count_handles,
> +                        sizeof(struct dma_fence *), GFP_KERNEL);
> +       if (!fences) {
> +               ret = -ENOMEM;
> +               goto err_free_handles;
> +       }
> +
> +       for (i = 0; i < args->count_handles; i++) {
> +               ret = drm_syncobj_fence_get(file_private, handles[i],
> +                                           &fences[i]);
> +               if (ret)
> +                       goto err_free_fence_array;
> +       }
> +
> +       ret = drm_syncobj_wait_fences(dev, file_private,
> +                                     args, fences);
>
>
>
> So, reading some CTS tests again, and I think we have a problem here.  The
> Vulkan spec allows you to wait on a fence that is in the unsignaled state.
>
>
>
> At least on the closed source driver that would be illegal as far as I
> know.
>
>
>
> Then they are doing workarounds in userspace.  There are definitely CTS
> tests for this:
>
> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/
> modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>
>
>
> You can't wait on a semaphore before the signal operation is send down to
> the kerel.
>
>
>
> We (Intel) deal with this today by tracking whether or not the fence has
> been submitted and using a condition variable in userspace to sort it all
> out.  If we ever want to share fences across processes (which we do), then
> this needs to be sorted in the kernel.
>
> --Jason
>
>
>
>
>
> Regards,
> Christian.
>
>
>
>
>   In theory, you could have thread A start waiting on a fence before
> thread B submits the work which triggers that fence.  This means that the
> dma_fence may not exist yet when vkWaitForFences gets called.  If we really
> want to support the full Vulkan usage, we need to somehow support missing
> dma_fences by waiting for the dma_fence to show up.  Unfortunately, I don't
> know enough about the internal kernel APIs to know what that would look
> like.
>
>
>
> +
> +err_free_fence_array:
> +       for (i = 0; i < args->count_handles; i++)
> +               dma_fence_put(fences[i]);
> +       kfree(fences);
> +err_free_handles:
> +       kfree(handles);
> +
> +       return ret;
> +}
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 101593a..91746a7 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -718,6 +718,18 @@ struct drm_syncobj_handle {
>         __u32 pad;
>  };
>
> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> +struct drm_syncobj_wait {
> +       __u64 handles;
> +       /* absolute timeout */
> +       __s64 timeout_sec;
> +       __s64 timeout_nsec;
> +       __u32 count_handles;
> +       __u32 flags;
> +       __u32 first_signaled; /* only valid when not waiting all */
> +       __u32 pad;
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
> @@ -840,6 +852,7 @@ extern "C" {
>  #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct
> drm_syncobj_destroy)
>  #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct
> drm_syncobj_handle)
>  #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct
> drm_syncobj_handle)
> +#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct
> drm_syncobj_wait)
>
>  /**
>   * Device specific ioctls should only be in their respective headers
> --
> 2.9.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
>
>
> _______________________________________________
>
> amd-gfx mailing list
>
> amd-gfx@lists.freedesktop.org
>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
>
>
>
>