drm/syncobj: add sync obj wait interface. (v6)

Submitted by Dave Airlie on July 6, 2017, 1:04 a.m.

Details

Message ID 20170706010409.9373-1-airlied@gmail.com
State New
Headers show
Series "drm/syncobj: add sync obj wait interface. (v6)" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Dave Airlie July 6, 2017, 1:04 a.m.
From: Dave Airlie <airlied@redhat.com>

This interface will allow sync object to be used to back
Vulkan fences. This API is pretty much the vulkan fence waiting
API, and I've ported the code from amdgpu.

v2: accept relative timeout, pass remaining time back
to userspace.
v3: return to absolute timeouts.
v4: absolute zero = poll,
    rewrite any/all code to have same operation for arrays
    return -EINVAL for 0 fences.
v4.1: fixup fences allocation check, use u64_to_user_ptr
v5: move to sec/nsec, and use timespec64 for calcs.
v6: use -ETIME and drop the out status flag. (-ETIME
is suggested by ickle, I can feel a shed painting)

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 142 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  13 ++++
 4 files changed, 159 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 5cecc97..d71b50d 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -157,3 +157,5 @@  int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
+int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f1e5681..385ce74 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -657,6 +657,8 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 89441bc..2d5a7a1 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1,5 +1,7 @@ 
 /*
  * Copyright 2017 Red Hat
+ * Parts ported from amdgpu (fence wait code).
+ * Copyright 2016 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +33,9 @@ 
  * that contain an optional fence. The fence can be updated with a new
  * fence, or be NULL.
  *
+ * syncobj's can be waited upon, where it will wait for the underlying
+ * fence.
+ *
  * syncobj's can be export to fd's and back, these fd's are opaque and
  * have no other use case, except passing the syncobj between processes.
  *
@@ -451,3 +456,140 @@  drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 	return drm_syncobj_fd_to_handle(file_private, args->fd,
 					&args->handle);
 }
+
+/**
+ * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
+ *
+ * @timeout_sec: timeout sec component, 0 for poll
+ * @timeout_nsec: timeout nsec component in ns, 0 for poll
+ * both must be 0 for poll.
+ *
+ * Calculate the timeout in jiffies from an absolute time in sec/nsec.
+ */
+static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec, uint64_t timeout_nsec)
+{
+	struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
+	unsigned long timeout_jiffies;
+
+	/* make 0 timeout means poll - absolute 0 doesn't seem valid */
+	if (timeout_sec == 0 && timeout_nsec == 0)
+		return 0;
+
+	abs_timeout.tv_sec = timeout_sec;
+	abs_timeout.tv_nsec = timeout_nsec;
+
+	/* clamp timeout if it's to large */
+	if (!timespec64_valid_strict(&abs_timeout))
+		return MAX_SCHEDULE_TIMEOUT - 1;
+
+	timeout = timespec64_sub(abs_timeout, ktime_to_timespec64(ktime_get()));
+	if (!timespec64_valid(&timeout))
+		return 0;
+
+	jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
+	if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
+		return MAX_SCHEDULE_TIMEOUT - 1;
+
+	timeout_jiffies = timespec64_to_jiffies(&timeout);
+	/*  clamp timeout to avoid infinite timeout */
+	if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
+		return MAX_SCHEDULE_TIMEOUT - 1;
+
+	return timeout_jiffies + 1;
+}
+
+static int drm_syncobj_wait_fences(struct drm_device *dev,
+				   struct drm_file *file_private,
+				   struct drm_syncobj_wait *wait,
+				   struct dma_fence **fences)
+{
+	unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec);
+	int ret = 0;
+	uint32_t first = ~0;
+
+	if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+		int i;
+		for (i = 0; i < wait->count_handles; i++) {
+			ret = dma_fence_wait_timeout(fences[i], true, timeout);
+
+			if (ret < 0)
+				return ret;
+			if (ret == 0)
+				break;
+			timeout = ret;
+		}
+		first = 0;
+	} else {
+		ret = dma_fence_wait_any_timeout(fences,
+						 wait->count_handles,
+						 true, timeout,
+						 &first);
+	}
+
+	if (ret < 0)
+		return ret;
+
+	wait->first_signaled = first;
+	if (ret == 0)
+		return -ETIME;
+	return 0;
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_private)
+{
+	struct drm_syncobj_wait *args = data;
+	uint32_t *handles;
+	struct dma_fence **fences;
+	int ret = 0;
+	int i;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	/* Get the handles from userspace */
+	handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
+				GFP_KERNEL);
+	if (handles == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(handles,
+			   u64_to_user_ptr(args->handles),
+			   sizeof(uint32_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_free_handles;
+	}
+
+	fences = kcalloc(args->count_handles,
+			 sizeof(struct dma_fence *), GFP_KERNEL);
+	if (!fences) {
+		ret = -ENOMEM;
+		goto err_free_handles;
+	}
+
+	for (i = 0; i < args->count_handles; i++) {
+		ret = drm_syncobj_fence_get(file_private, handles[i],
+					    &fences[i]);
+		if (ret)
+			goto err_free_fence_array;
+	}
+
+	ret = drm_syncobj_wait_fences(dev, file_private,
+				      args, fences);
+
+err_free_fence_array:
+	for (i = 0; i < args->count_handles; i++)
+		dma_fence_put(fences[i]);
+	kfree(fences);
+err_free_handles:
+	kfree(handles);
+
+	return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 101593a..91746a7 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -718,6 +718,18 @@  struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_sec;
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +852,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
 
 /**
  * Device specific ioctls should only be in their respective headers

Comments

On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote:

> From: Dave Airlie <airlied@redhat.com>
>
> This interface will allow sync object to be used to back
> Vulkan fences. This API is pretty much the vulkan fence waiting
> API, and I've ported the code from amdgpu.
>
> v2: accept relative timeout, pass remaining time back
> to userspace.
> v3: return to absolute timeouts.
> v4: absolute zero = poll,
>     rewrite any/all code to have same operation for arrays
>     return -EINVAL for 0 fences.
> v4.1: fixup fences allocation check, use u64_to_user_ptr
> v5: move to sec/nsec, and use timespec64 for calcs.
> v6: use -ETIME and drop the out status flag. (-ETIME
> is suggested by ickle, I can feel a shed painting)
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  drivers/gpu/drm/drm_internal.h |   2 +
>  drivers/gpu/drm/drm_ioctl.c    |   2 +
>  drivers/gpu/drm/drm_syncobj.c  | 142 ++++++++++++++++++++++++++++++
> +++++++++++
>  include/uapi/drm/drm.h         |  13 ++++
>  4 files changed, 159 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_
> internal.h
> index 5cecc97..d71b50d 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device
> *dev, void *data,
>                                    struct drm_file *file_private);
>  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>                                    struct drm_file *file_private);
> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                          struct drm_file *file_private);
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index f1e5681..385ce74 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
> drm_syncobj_fd_to_handle_ioctl,
>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
> +                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  };
>
>  #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 89441bc..2d5a7a1 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1,5 +1,7 @@
>  /*
>   * Copyright 2017 Red Hat
> + * Parts ported from amdgpu (fence wait code).
> + * Copyright 2016 Advanced Micro Devices, Inc.
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a
>   * copy of this software and associated documentation files (the
> "Software"),
> @@ -31,6 +33,9 @@
>   * that contain an optional fence. The fence can be updated with a new
>   * fence, or be NULL.
>   *
> + * syncobj's can be waited upon, where it will wait for the underlying
> + * fence.
> + *
>   * syncobj's can be export to fd's and back, these fd's are opaque and
>   * have no other use case, except passing the syncobj between processes.
>   *
> @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
> *dev, void *data,
>         return drm_syncobj_fd_to_handle(file_private, args->fd,
>                                         &args->handle);
>  }
> +
> +/**
> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute
> value
> + *
> + * @timeout_sec: timeout sec component, 0 for poll
> + * @timeout_nsec: timeout nsec component in ns, 0 for poll
> + * both must be 0 for poll.
> + *
> + * Calculate the timeout in jiffies from an absolute time in sec/nsec.
> + */
> +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec,
> uint64_t timeout_nsec)
> +{
> +       struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
> +       unsigned long timeout_jiffies;
> +
> +       /* make 0 timeout means poll - absolute 0 doesn't seem valid */
> +       if (timeout_sec == 0 && timeout_nsec == 0)
> +               return 0;
> +
> +       abs_timeout.tv_sec = timeout_sec;
> +       abs_timeout.tv_nsec = timeout_nsec;
> +
> +       /* clamp timeout if it's to large */
> +       if (!timespec64_valid_strict(&abs_timeout))
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       timeout = timespec64_sub(abs_timeout,
> ktime_to_timespec64(ktime_get()));
> +       if (!timespec64_valid(&timeout))
> +               return 0;
> +
> +       jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
> +       if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       timeout_jiffies = timespec64_to_jiffies(&timeout);
> +       /*  clamp timeout to avoid infinite timeout */
> +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
> +               return MAX_SCHEDULE_TIMEOUT - 1;
> +
> +       return timeout_jiffies + 1;
> +}
> +
> +static int drm_syncobj_wait_fences(struct drm_device *dev,
> +                                  struct drm_file *file_private,
> +                                  struct drm_syncobj_wait *wait,
> +                                  struct dma_fence **fences)
> +{
> +       unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec,
> wait->timeout_nsec);
> +       int ret = 0;
> +       uint32_t first = ~0;
> +
> +       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
> +               int i;
> +               for (i = 0; i < wait->count_handles; i++) {
> +                       ret = dma_fence_wait_timeout(fences[i], true,
> timeout);
> +
> +                       if (ret < 0)
> +                               return ret;
> +                       if (ret == 0)
> +                               break;
> +                       timeout = ret;
> +               }
> +               first = 0;
> +       } else {
> +               ret = dma_fence_wait_any_timeout(fences,
> +                                                wait->count_handles,
> +                                                true, timeout,
> +                                                &first);
> +       }
> +
> +       if (ret < 0)
> +               return ret;
> +
> +       wait->first_signaled = first;
> +       if (ret == 0)
> +               return -ETIME;
> +       return 0;
> +}
> +
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *file_private)
> +{
> +       struct drm_syncobj_wait *args = data;
> +       uint32_t *handles;
> +       struct dma_fence **fences;
> +       int ret = 0;
> +       int i;
> +
> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +               return -ENODEV;
> +
> +       if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_
> ALL)
> +               return -EINVAL;
> +
> +       if (args->count_handles == 0)
> +               return -EINVAL;
> +
> +       /* Get the handles from userspace */
> +       handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
> +                               GFP_KERNEL);
> +       if (handles == NULL)
> +               return -ENOMEM;
> +
> +       if (copy_from_user(handles,
> +                          u64_to_user_ptr(args->handles),
> +                          sizeof(uint32_t) * args->count_handles)) {
> +               ret = -EFAULT;
> +               goto err_free_handles;
> +       }
> +
> +       fences = kcalloc(args->count_handles,
> +                        sizeof(struct dma_fence *), GFP_KERNEL);
> +       if (!fences) {
> +               ret = -ENOMEM;
> +               goto err_free_handles;
> +       }
> +
> +       for (i = 0; i < args->count_handles; i++) {
> +               ret = drm_syncobj_fence_get(file_private, handles[i],
> +                                           &fences[i]);
> +               if (ret)
> +                       goto err_free_fence_array;
> +       }
> +
> +       ret = drm_syncobj_wait_fences(dev, file_private,
> +                                     args, fences);
>

So, reading some CTS tests again, and I think we have a problem here.  The
Vulkan spec allows you to wait on a fence that is in the unsignaled state.
In theory, you could have thread A start waiting on a fence before thread B
submits the work which triggers that fence.  This means that the dma_fence
may not exist yet when vkWaitForFences gets called.  If we really want to
support the full Vulkan usage, we need to somehow support missing
dma_fences by waiting for the dma_fence to show up.  Unfortunately, I don't
know enough about the internal kernel APIs to know what that would look
like.


> +
> +err_free_fence_array:
> +       for (i = 0; i < args->count_handles; i++)
> +               dma_fence_put(fences[i]);
> +       kfree(fences);
> +err_free_handles:
> +       kfree(handles);
> +
> +       return ret;
> +}
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 101593a..91746a7 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -718,6 +718,18 @@ struct drm_syncobj_handle {
>         __u32 pad;
>  };
>
> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> +struct drm_syncobj_wait {
> +       __u64 handles;
> +       /* absolute timeout */
> +       __s64 timeout_sec;
> +       __s64 timeout_nsec;
> +       __u32 count_handles;
> +       __u32 flags;
> +       __u32 first_signaled; /* only valid when not waiting all */
> +       __u32 pad;
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
> @@ -840,6 +852,7 @@ extern "C" {
>  #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct
> drm_syncobj_destroy)
>  #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct
> drm_syncobj_handle)
>  #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct
> drm_syncobj_handle)
> +#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct
> drm_syncobj_wait)
>
>  /**
>   * Device specific ioctls should only be in their respective headers
> --
> 2.9.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com 
> <mailto:airlied@gmail.com>> wrote:
>
>     From: Dave Airlie <airlied@redhat.com <mailto:airlied@redhat.com>>
>
>     This interface will allow sync object to be used to back
>     Vulkan fences. This API is pretty much the vulkan fence waiting
>     API, and I've ported the code from amdgpu.
>
>     v2: accept relative timeout, pass remaining time back
>     to userspace.
>     v3: return to absolute timeouts.
>     v4: absolute zero = poll,
>         rewrite any/all code to have same operation for arrays
>         return -EINVAL for 0 fences.
>     v4.1: fixup fences allocation check, use u64_to_user_ptr
>     v5: move to sec/nsec, and use timespec64 for calcs.
>     v6: use -ETIME and drop the out status flag. (-ETIME
>     is suggested by ickle, I can feel a shed painting)
>
>     Signed-off-by: Dave Airlie <airlied@redhat.com
>     <mailto:airlied@redhat.com>>
>     ---
>      drivers/gpu/drm/drm_internal.h |   2 +
>      drivers/gpu/drm/drm_ioctl.c    |   2 +
>      drivers/gpu/drm/drm_syncobj.c  | 142
>     +++++++++++++++++++++++++++++++++++++++++
>      include/uapi/drm/drm.h         |  13 ++++
>      4 files changed, 159 insertions(+)
>
>     diff --git a/drivers/gpu/drm/drm_internal.h
>     b/drivers/gpu/drm/drm_internal.h
>     index 5cecc97..d71b50d 100644
>     --- a/drivers/gpu/drm/drm_internal.h
>     +++ b/drivers/gpu/drm/drm_internal.h
>     @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct
>     drm_device *dev, void *data,
>                                        struct drm_file *file_private);
>      int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void
>     *data,
>                                        struct drm_file *file_private);
>     +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>     +                          struct drm_file *file_private);
>     diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>     index f1e5681..385ce74 100644
>     --- a/drivers/gpu/drm/drm_ioctl.c
>     +++ b/drivers/gpu/drm/drm_ioctl.c
>     @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc
>     drm_ioctls[] = {
>                           DRM_UNLOCKED|DRM_RENDER_ALLOW),
>             DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
>     drm_syncobj_fd_to_handle_ioctl,
>                           DRM_UNLOCKED|DRM_RENDER_ALLOW),
>     +       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>     +                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
>      };
>
>      #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
>     diff --git a/drivers/gpu/drm/drm_syncobj.c
>     b/drivers/gpu/drm/drm_syncobj.c
>     index 89441bc..2d5a7a1 100644
>     --- a/drivers/gpu/drm/drm_syncobj.c
>     +++ b/drivers/gpu/drm/drm_syncobj.c
>     @@ -1,5 +1,7 @@
>      /*
>       * Copyright 2017 Red Hat
>     + * Parts ported from amdgpu (fence wait code).
>     + * Copyright 2016 Advanced Micro Devices, Inc.
>       *
>       * Permission is hereby granted, free of charge, to any person
>     obtaining a
>       * copy of this software and associated documentation files (the
>     "Software"),
>     @@ -31,6 +33,9 @@
>       * that contain an optional fence. The fence can be updated with
>     a new
>       * fence, or be NULL.
>       *
>     + * syncobj's can be waited upon, where it will wait for the
>     underlying
>     + * fence.
>     + *
>       * syncobj's can be export to fd's and back, these fd's are
>     opaque and
>       * have no other use case, except passing the syncobj between
>     processes.
>       *
>     @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct
>     drm_device *dev, void *data,
>             return drm_syncobj_fd_to_handle(file_private, args->fd,
>     &args->handle);
>      }
>     +
>     +/**
>     + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from
>     absolute value
>     + *
>     + * @timeout_sec: timeout sec component, 0 for poll
>     + * @timeout_nsec: timeout nsec component in ns, 0 for poll
>     + * both must be 0 for poll.
>     + *
>     + * Calculate the timeout in jiffies from an absolute time in
>     sec/nsec.
>     + */
>     +static unsigned long drm_timeout_abs_to_jiffies(int64_t
>     timeout_sec, uint64_t timeout_nsec)
>     +{
>     +       struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
>     +       unsigned long timeout_jiffies;
>     +
>     +       /* make 0 timeout means poll - absolute 0 doesn't seem
>     valid */
>     +       if (timeout_sec == 0 && timeout_nsec == 0)
>     +               return 0;
>     +
>     +       abs_timeout.tv_sec = timeout_sec;
>     +       abs_timeout.tv_nsec = timeout_nsec;
>     +
>     +       /* clamp timeout if it's to large */
>     +       if (!timespec64_valid_strict(&abs_timeout))
>     +               return MAX_SCHEDULE_TIMEOUT - 1;
>     +
>     +       timeout = timespec64_sub(abs_timeout,
>     ktime_to_timespec64(ktime_get()));
>     +       if (!timespec64_valid(&timeout))
>     +               return 0;
>     +
>     +       jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
>     +       if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
>     +               return MAX_SCHEDULE_TIMEOUT - 1;
>     +
>     +       timeout_jiffies = timespec64_to_jiffies(&timeout);
>     +       /*  clamp timeout to avoid infinite timeout */
>     +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
>     +               return MAX_SCHEDULE_TIMEOUT - 1;
>     +
>     +       return timeout_jiffies + 1;
>     +}
>     +
>     +static int drm_syncobj_wait_fences(struct drm_device *dev,
>     +                                  struct drm_file *file_private,
>     +                                  struct drm_syncobj_wait *wait,
>     +                                  struct dma_fence **fences)
>     +{
>     +       unsigned long timeout =
>     drm_timeout_abs_to_jiffies(wait->timeout_sec, wait->timeout_nsec);
>     +       int ret = 0;
>     +       uint32_t first = ~0;
>     +
>     +       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>     +               int i;
>     +               for (i = 0; i < wait->count_handles; i++) {
>     +                       ret = dma_fence_wait_timeout(fences[i],
>     true, timeout);
>     +
>     +                       if (ret < 0)
>     +                               return ret;
>     +                       if (ret == 0)
>     +                               break;
>     +                       timeout = ret;
>     +               }
>     +               first = 0;
>     +       } else {
>     +               ret = dma_fence_wait_any_timeout(fences,
>     + wait->count_handles,
>     +                                                true, timeout,
>     + &first);
>     +       }
>     +
>     +       if (ret < 0)
>     +               return ret;
>     +
>     +       wait->first_signaled = first;
>     +       if (ret == 0)
>     +               return -ETIME;
>     +       return 0;
>     +}
>     +
>     +int
>     +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>     +                      struct drm_file *file_private)
>     +{
>     +       struct drm_syncobj_wait *args = data;
>     +       uint32_t *handles;
>     +       struct dma_fence **fences;
>     +       int ret = 0;
>     +       int i;
>     +
>     +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>     +               return -ENODEV;
>     +
>     +       if (args->flags != 0 && args->flags !=
>     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>     +               return -EINVAL;
>     +
>     +       if (args->count_handles == 0)
>     +               return -EINVAL;
>     +
>     +       /* Get the handles from userspace */
>     +       handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
>     +                               GFP_KERNEL);
>     +       if (handles == NULL)
>     +               return -ENOMEM;
>     +
>     +       if (copy_from_user(handles,
>     + u64_to_user_ptr(args->handles),
>     +                          sizeof(uint32_t) * args->count_handles)) {
>     +               ret = -EFAULT;
>     +               goto err_free_handles;
>     +       }
>     +
>     +       fences = kcalloc(args->count_handles,
>     +                        sizeof(struct dma_fence *), GFP_KERNEL);
>     +       if (!fences) {
>     +               ret = -ENOMEM;
>     +               goto err_free_handles;
>     +       }
>     +
>     +       for (i = 0; i < args->count_handles; i++) {
>     +               ret = drm_syncobj_fence_get(file_private, handles[i],
>     +  &fences[i]);
>     +               if (ret)
>     +                       goto err_free_fence_array;
>     +       }
>     +
>     +       ret = drm_syncobj_wait_fences(dev, file_private,
>     +                                     args, fences);
>
>
> So, reading some CTS tests again, and I think we have a problem here.  
> The Vulkan spec allows you to wait on a fence that is in the 
> unsignaled state.

At least on the closed source driver that would be illegal as far as I know.

You can't wait on a semaphore before the signal operation is send down 
to the kernel.

Regards,
Christian.

>   In theory, you could have thread A start waiting on a fence before 
> thread B submits the work which triggers that fence.  This means that 
> the dma_fence may not exist yet when vkWaitForFences gets called.  If 
> we really want to support the full Vulkan usage, we need to somehow 
> support missing dma_fences by waiting for the dma_fence to show up.  
> Unfortunately, I don't know enough about the internal kernel APIs to 
> know what that would look like.
>
>     +
>     +err_free_fence_array:
>     +       for (i = 0; i < args->count_handles; i++)
>     +               dma_fence_put(fences[i]);
>     +       kfree(fences);
>     +err_free_handles:
>     +       kfree(handles);
>     +
>     +       return ret;
>     +}
>     diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>     index 101593a..91746a7 100644
>     --- a/include/uapi/drm/drm.h
>     +++ b/include/uapi/drm/drm.h
>     @@ -718,6 +718,18 @@ struct drm_syncobj_handle {
>             __u32 pad;
>      };
>
>     +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>     +struct drm_syncobj_wait {
>     +       __u64 handles;
>     +       /* absolute timeout */
>     +       __s64 timeout_sec;
>     +       __s64 timeout_nsec;
>     +       __u32 count_handles;
>     +       __u32 flags;
>     +       __u32 first_signaled; /* only valid when not waiting all */
>     +       __u32 pad;
>     +};
>     +
>      #if defined(__cplusplus)
>      }
>      #endif
>     @@ -840,6 +852,7 @@ extern "C" {
>      #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct
>     drm_syncobj_destroy)
>      #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct
>     drm_syncobj_handle)
>      #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct
>     drm_syncobj_handle)
>     +#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct
>     drm_syncobj_wait)
>
>      /**
>       * Device specific ioctls should only be in their respective headers
>     --
>     2.9.4
>
>     _______________________________________________
>     dri-devel mailing list
>     dri-devel@lists.freedesktop.org
>     <mailto:dri-devel@lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/dri-devel
>     <https://lists.freedesktop.org/mailman/listinfo/dri-devel>
>
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>
> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote:
>
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This interface will allow sync object to be used to back
>> Vulkan fences. This API is pretty much the vulkan fence waiting
>> API, and I've ported the code from amdgpu.
>>
>> v2: accept relative timeout, pass remaining time back
>> to userspace.
>> v3: return to absolute timeouts.
>> v4: absolute zero = poll,
>>     rewrite any/all code to have same operation for arrays
>>     return -EINVAL for 0 fences.
>> v4.1: fixup fences allocation check, use u64_to_user_ptr
>> v5: move to sec/nsec, and use timespec64 for calcs.
>> v6: use -ETIME and drop the out status flag. (-ETIME
>> is suggested by ickle, I can feel a shed painting)
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> ---
>>  drivers/gpu/drm/drm_internal.h |   2 +
>>  drivers/gpu/drm/drm_ioctl.c    |   2 +
>>  drivers/gpu/drm/drm_syncobj.c  | 142 ++++++++++++++++++++++++++++++
>> +++++++++++
>>  include/uapi/drm/drm.h         |  13 ++++
>>  4 files changed, 159 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h
>> b/drivers/gpu/drm/drm_internal.h
>> index 5cecc97..d71b50d 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device
>> *dev, void *data,
>>                                    struct drm_file *file_private);
>>  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>                                    struct drm_file *file_private);
>> +int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> +                          struct drm_file *file_private);
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index f1e5681..385ce74 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
>> drm_syncobj_fd_to_handle_ioctl,
>>                       DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>> +                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>  };
>>
>>  #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.
>> c
>> index 89441bc..2d5a7a1 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1,5 +1,7 @@
>>  /*
>>   * Copyright 2017 Red Hat
>> + * Parts ported from amdgpu (fence wait code).
>> + * Copyright 2016 Advanced Micro Devices, Inc.
>>   *
>>   * Permission is hereby granted, free of charge, to any person obtaining
>> a
>>   * copy of this software and associated documentation files (the
>> "Software"),
>> @@ -31,6 +33,9 @@
>>   * that contain an optional fence. The fence can be updated with a new
>>   * fence, or be NULL.
>>   *
>> + * syncobj's can be waited upon, where it will wait for the underlying
>> + * fence.
>> + *
>>   * syncobj's can be export to fd's and back, these fd's are opaque and
>>   * have no other use case, except passing the syncobj between processes.
>>   *
>> @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
>> *dev, void *data,
>>         return drm_syncobj_fd_to_handle(file_private, args->fd,
>>                                         &args->handle);
>>  }
>> +
>> +/**
>> + * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute
>> value
>> + *
>> + * @timeout_sec: timeout sec component, 0 for poll
>> + * @timeout_nsec: timeout nsec component in ns, 0 for poll
>> + * both must be 0 for poll.
>> + *
>> + * Calculate the timeout in jiffies from an absolute time in sec/nsec.
>> + */
>> +static unsigned long drm_timeout_abs_to_jiffies(int64_t timeout_sec,
>> uint64_t timeout_nsec)
>> +{
>> +       struct timespec64 abs_timeout, timeout, max_jiffy_timespec;
>> +       unsigned long timeout_jiffies;
>> +
>> +       /* make 0 timeout means poll - absolute 0 doesn't seem valid */
>> +       if (timeout_sec == 0 && timeout_nsec == 0)
>> +               return 0;
>> +
>> +       abs_timeout.tv_sec = timeout_sec;
>> +       abs_timeout.tv_nsec = timeout_nsec;
>> +
>> +       /* clamp timeout if it's to large */
>> +       if (!timespec64_valid_strict(&abs_timeout))
>> +               return MAX_SCHEDULE_TIMEOUT - 1;
>> +
>> +       timeout = timespec64_sub(abs_timeout,
>> ktime_to_timespec64(ktime_get()));
>> +       if (!timespec64_valid(&timeout))
>> +               return 0;
>> +
>> +       jiffies_to_timespec64(MAX_JIFFY_OFFSET, &max_jiffy_timespec);
>> +       if (timespec64_compare(&timeout, &max_jiffy_timespec) >= 0)
>> +               return MAX_SCHEDULE_TIMEOUT - 1;
>> +
>> +       timeout_jiffies = timespec64_to_jiffies(&timeout);
>> +       /*  clamp timeout to avoid infinite timeout */
>> +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)
>> +               return MAX_SCHEDULE_TIMEOUT - 1;
>> +
>> +       return timeout_jiffies + 1;
>> +}
>> +
>> +static int drm_syncobj_wait_fences(struct drm_device *dev,
>> +                                  struct drm_file *file_private,
>> +                                  struct drm_syncobj_wait *wait,
>> +                                  struct dma_fence **fences)
>> +{
>> +       unsigned long timeout = drm_timeout_abs_to_jiffies(wait->timeout_sec,
>> wait->timeout_nsec);
>> +       int ret = 0;
>> +       uint32_t first = ~0;
>> +
>> +       if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>> +               int i;
>> +               for (i = 0; i < wait->count_handles; i++) {
>> +                       ret = dma_fence_wait_timeout(fences[i], true,
>> timeout);
>> +
>> +                       if (ret < 0)
>> +                               return ret;
>> +                       if (ret == 0)
>> +                               break;
>> +                       timeout = ret;
>> +               }
>> +               first = 0;
>> +       } else {
>> +               ret = dma_fence_wait_any_timeout(fences,
>> +                                                wait->count_handles,
>> +                                                true, timeout,
>> +                                                &first);
>> +       }
>> +
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       wait->first_signaled = first;
>> +       if (ret == 0)
>> +               return -ETIME;
>> +       return 0;
>> +}
>> +
>> +int
>> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> +                      struct drm_file *file_private)
>> +{
>> +       struct drm_syncobj_wait *args = data;
>> +       uint32_t *handles;
>> +       struct dma_fence **fences;
>> +       int ret = 0;
>> +       int i;
>> +
>> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +               return -ENODEV;
>> +
>> +       if (args->flags != 0 && args->flags !=
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +               return -EINVAL;
>> +
>> +       if (args->count_handles == 0)
>> +               return -EINVAL;
>> +
>> +       /* Get the handles from userspace */
>> +       handles = kmalloc_array(args->count_handles, sizeof(uint32_t),
>> +                               GFP_KERNEL);
>> +       if (handles == NULL)
>> +               return -ENOMEM;
>> +
>> +       if (copy_from_user(handles,
>> +                          u64_to_user_ptr(args->handles),
>> +                          sizeof(uint32_t) * args->count_handles)) {
>> +               ret = -EFAULT;
>> +               goto err_free_handles;
>> +       }
>> +
>> +       fences = kcalloc(args->count_handles,
>> +                        sizeof(struct dma_fence *), GFP_KERNEL);
>> +       if (!fences) {
>> +               ret = -ENOMEM;
>> +               goto err_free_handles;
>> +       }
>> +
>> +       for (i = 0; i < args->count_handles; i++) {
>> +               ret = drm_syncobj_fence_get(file_private, handles[i],
>> +                                           &fences[i]);
>> +               if (ret)
>> +                       goto err_free_fence_array;
>> +       }
>> +
>> +       ret = drm_syncobj_wait_fences(dev, file_private,
>> +                                     args, fences);
>>
>
> So, reading some CTS tests again, and I think we have a problem here.  The
> Vulkan spec allows you to wait on a fence that is in the unsignaled state.
>
>
> At least on the closed source driver that would be illegal as far as I
> know.
>

Then they are doing workarounds in userspace.  There are definitely CTS
tests for this:

https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74


> You can't wait on a semaphore before the signal operation is send down to
> the kerel.
>

We (Intel) deal with this today by tracking whether or not the fence has
been submitted and using a condition variable in userspace to sort it all
out.  If we ever want to share fences across processes (which we do), then
this needs to be sorted in the kernel.

--Jason



> Regards,
> Christian.
>
>
>   In theory, you could have thread A start waiting on a fence before
> thread B submits the work which triggers that fence.  This means that the
> dma_fence may not exist yet when vkWaitForFences gets called.  If we really
> want to support the full Vulkan usage, we need to somehow support missing
> dma_fences by waiting for the dma_fence to show up.  Unfortunately, I don't
> know enough about the internal kernel APIs to know what that would look
> like.
>
>
>
>> +
>> +err_free_fence_array:
>> +       for (i = 0; i < args->count_handles; i++)
>> +               dma_fence_put(fences[i]);
>> +       kfree(fences);
>> +err_free_handles:
>> +       kfree(handles);
>> +
>> +       return ret;
>> +}
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 101593a..91746a7 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -718,6 +718,18 @@ struct drm_syncobj_handle {
>>         __u32 pad;
>>  };
>>
>> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>> +struct drm_syncobj_wait {
>> +       __u64 handles;
>> +       /* absolute timeout */
>> +       __s64 timeout_sec;
>> +       __s64 timeout_nsec;
>> +       __u32 count_handles;
>> +       __u32 flags;
>> +       __u32 first_signaled; /* only valid when not waiting all */
>> +       __u32 pad;
>> +};
>> +
>>  #if defined(__cplusplus)
>>  }
>>  #endif
>> @@ -840,6 +852,7 @@ extern "C" {
>>  #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct
>> drm_syncobj_destroy)
>>  #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct
>> drm_syncobj_handle)
>>  #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct
>> drm_syncobj_handle)
>> +#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct
>> drm_syncobj_wait)
>>
>>  /**
>>   * Device specific ioctls should only be in their respective headers
>> --
>> 2.9.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>
>
>
> _______________________________________________
> amd-gfx mailing listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
> On Mon, Jul 10, 2017 at 8:45 AM, Christian König 
> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>
>     Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>>     On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com
>>     <mailto:airlied@gmail.com>> wrote:
>>     [SNIP]
>>     So, reading some CTS tests again, and I think we have a problem
>>     here.  The Vulkan spec allows you to wait on a fence that is in
>>     the unsignaled state.
>
>     At least on the closed source driver that would be illegal as far
>     as I know.
>
>
> Then they are doing workarounds in userspace.  There are definitely 
> CTS tests for this:
>
> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>
>     You can't wait on a semaphore before the signal operation is send
>     down to the kerel.
>
>
> We (Intel) deal with this today by tracking whether or not the fence 
> has been submitted and using a condition variable in userspace to sort 
> it all out.

Which sounds exactly like what AMD is doing in it's drivers as well.

> If we ever want to share fences across processes (which we do), then 
> this needs to be sorted in the kernel.

That would clearly get a NAK from my side, even Microsoft forbids wait 
before signal because you can easily end up in deadlock situations.

Regards,
Christian.
On Mon, Jul 10, 2017 at 9:15 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
>
> On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de>
> wrote:
>
>> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>>
>> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote:
>> [SNIP]
>> So, reading some CTS tests again, and I think we have a problem here.
>> The Vulkan spec allows you to wait on a fence that is in the unsignaled
>> state.
>>
>>
>> At least on the closed source driver that would be illegal as far as I
>> know.
>>
>
> Then they are doing workarounds in userspace.  There are definitely CTS
> tests for this:
>
> https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/
> modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>
>
>> You can't wait on a semaphore before the signal operation is send down to
>> the kerel.
>>
>
> We (Intel) deal with this today by tracking whether or not the fence has
> been submitted and using a condition variable in userspace to sort it all
> out.
>
>
> Which sounds exactly like what AMD is doing in it's drivers as well.
>

Which doesn't work cross-process so...

> If we ever want to share fences across processes (which we do), then this
> needs to be sorted in the kernel.
>
>
> That would clearly get a NAK from my side, even Microsoft forbids wait
> before signal because you can easily end up in deadlock situations.
>

Please don't NAK things that are required by the API specification and CTS
tests.  That makes it very hard for people like me to get their jobs done.
:-)

Now, as for whether or not it's a good idea.  First off, we do have
timeouts an a status querying mechanism so an application can just set a
timeout of 1s and do something if it times out.  Second, if the application
is a compositor or something else that doesn't trust its client, it
shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence
sharing anyway.  For those scenarios, they can require the untrusted client
to use FENCE_FD (sync file) and they have all of the usual guarantees about
when the work got submitted, etc.

Also, I'm more than happy to put this all behind a flag so it's not the
default behavior.

--Jason
On 11/07/17 06:09 AM, Jason Ekstrand wrote:
> On Mon, Jul 10, 2017 at 9:15 AM, Christian König
> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
> 
>     Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
>>     On Mon, Jul 10, 2017 at 8:45 AM, Christian König
>>     <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>>
>>         Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>>>         On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie
>>>         <airlied@gmail.com <mailto:airlied@gmail.com>> wrote:
>>>         [SNIP]
>>>         So, reading some CTS tests again, and I think we have a
>>>         problem here.  The Vulkan spec allows you to wait on a fence
>>>         that is in the unsignaled state.
>>
>>         At least on the closed source driver that would be illegal as
>>         far as I know.
>>
>>
>>     Then they are doing workarounds in userspace.  There are
>>     definitely CTS tests for this:
>>
>>     https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>>     <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74>
>>      
>>
>>         You can't wait on a semaphore before the signal operation is
>>         send down to the kerel.
>>
>>
>>     We (Intel) deal with this today by tracking whether or not the
>>     fence has been submitted and using a condition variable in
>>     userspace to sort it all out.
> 
>     Which sounds exactly like what AMD is doing in it's drivers as well.
> 
> 
> Which doesn't work cross-process so...

Surely it can be made to work by providing suitable kernel APIs to
userspace?


>>     If we ever want to share fences across processes (which we do),
>>     then this needs to be sorted in the kernel.
> 
>     That would clearly get a NAK from my side, even Microsoft forbids
>     wait before signal because you can easily end up in deadlock situations.
> 
> Please don't NAK things that are required by the API specification and
> CTS tests.

There is no requirement for every aspect of the Vulkan API specification
to be mirrored 1:1 in the kernel <-> userspace API. We have to work out
what makes sense at each level.


> That makes it very hard for people like me to get their jobs done. :-)

Jason, that's uncalled for. Christian is also just doing his job here.
Am 11.07.2017 um 04:36 schrieb Michel Dänzer:
> On 11/07/17 06:09 AM, Jason Ekstrand wrote:
>> On Mon, Jul 10, 2017 at 9:15 AM, Christian König
>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>>
>>      Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
>>>      On Mon, Jul 10, 2017 at 8:45 AM, Christian König
>>>      <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>>>
>>>          Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>>>>          On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie
>>>>          <airlied@gmail.com <mailto:airlied@gmail.com>> wrote:
>>>>          [SNIP]
>>>>          So, reading some CTS tests again, and I think we have a
>>>>          problem here.  The Vulkan spec allows you to wait on a fence
>>>>          that is in the unsignaled state.
>>>          At least on the closed source driver that would be illegal as
>>>          far as I know.
>>>
>>>
>>>      Then they are doing workarounds in userspace.  There are
>>>      definitely CTS tests for this:
>>>
>>>      https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
>>>      <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74>
>>>       
>>>
>>>          You can't wait on a semaphore before the signal operation is
>>>          send down to the kerel.
>>>
>>>
>>>      We (Intel) deal with this today by tracking whether or not the
>>>      fence has been submitted and using a condition variable in
>>>      userspace to sort it all out.
>>      Which sounds exactly like what AMD is doing in it's drivers as well.
>>
>>
>> Which doesn't work cross-process so...
> Surely it can be made to work by providing suitable kernel APIs to
> userspace?

Well, that's exactly what Jason proposed to do, but I'm not very keen of 
that.

>>>      If we ever want to share fences across processes (which we do),
>>>      then this needs to be sorted in the kernel.
>>      That would clearly get a NAK from my side, even Microsoft forbids
>>      wait before signal because you can easily end up in deadlock situations.
>>
>> Please don't NAK things that are required by the API specification and
>> CTS tests.
> There is no requirement for every aspect of the Vulkan API specification
> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out
> what makes sense at each level.

Exactly, if we have a synchronization problem between two processes that 
should be solved in userspace.

E.g. if process A hasn't submitted it's work to the kernel it should 
flush it's commands before sending a flip event to the compositor.

We can attach something to the fd making it possible for an X shared 
memory fence to be transported with it if that makes live easier.

This way the waiter implementation can still chose what to do and/or 
wait async for the client to have it's flushes completed etc...

Regards,
Christian.
On Mon, Jul 10, 2017 at 02:09:42PM -0700, Jason Ekstrand wrote:
> On Mon, Jul 10, 2017 at 9:15 AM, Christian König <deathsimple@vodafone.de>
> wrote:
> 
> > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
> >
> > On Mon, Jul 10, 2017 at 8:45 AM, Christian König <deathsimple@vodafone.de>
> > wrote:
> >
> >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
> >>
> >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com> wrote:
> >> [SNIP]
> >> So, reading some CTS tests again, and I think we have a problem here.
> >> The Vulkan spec allows you to wait on a fence that is in the unsignaled
> >> state.
> >>
> >>
> >> At least on the closed source driver that would be illegal as far as I
> >> know.
> >>
> >
> > Then they are doing workarounds in userspace.  There are definitely CTS
> > tests for this:
> >
> > https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/
> > modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74
> >
> >
> >> You can't wait on a semaphore before the signal operation is send down to
> >> the kerel.
> >>
> >
> > We (Intel) deal with this today by tracking whether or not the fence has
> > been submitted and using a condition variable in userspace to sort it all
> > out.
> >
> >
> > Which sounds exactly like what AMD is doing in it's drivers as well.
> >
> 
> Which doesn't work cross-process so...
> 
> > If we ever want to share fences across processes (which we do), then this
> > needs to be sorted in the kernel.
> >
> >
> > That would clearly get a NAK from my side, even Microsoft forbids wait
> > before signal because you can easily end up in deadlock situations.
> >
> 
> Please don't NAK things that are required by the API specification and CTS
> tests.  That makes it very hard for people like me to get their jobs done.
> :-)
> 
> Now, as for whether or not it's a good idea.  First off, we do have
> timeouts an a status querying mechanism so an application can just set a
> timeout of 1s and do something if it times out.  Second, if the application
> is a compositor or something else that doesn't trust its client, it
> shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence
> sharing anyway.  For those scenarios, they can require the untrusted client
> to use FENCE_FD (sync file) and they have all of the usual guarantees about
> when the work got submitted, etc.
> 
> Also, I'm more than happy to put this all behind a flag so it's not the
> default behavior.

Android had a similar requirement to have a fence fd before the fence
existed in hwc1, before they fixed that in hwc2. But it's probably still
useful for deeply pipelined renderes with littel memory, aka tiled
renderers on phones.

The idea we've tossed around is to create a so-called future fence. In the
kernel if you try to deref a future fence, the usual thing that happens is
you'll block (interruptibly, which we can because fence lookup might
fail), _until_ a real fence shows up and can be returned. That implements
the uapi expectations without risking deadlocks in the kernel, albeit with
a bit much blocking. Still better than doing the same in userspace (since
in userspace you probably need to do that when importing the fence, not at
execbuf time).

2nd step would then be to give drivers with a robust hand recover logic a
special interface to be able to instantiate hw waits on such a future
fence before the signalling part is queued up. As long as any waiters have
robust hang recovery we still don't have a problem. Everyone else (e.g.
drm display-only drivers, v4l nodes, camera ip, whatever else participates
in the shared buffers and fences stuff) would still block until the real
fence shows up.

Similar idea should work for semaphores too. Gustavo did look into the
future fence stuff iirc, I think there was even an rfc sometimes ago.

Cheers, Daniel
On Tue, Jul 11, 2017 at 12:17 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 11.07.2017 um 04:36 schrieb Michel Dänzer:
>
>> On 11/07/17 06:09 AM, Jason Ekstrand wrote:
>>
>>> On Mon, Jul 10, 2017 at 9:15 AM, Christian König
>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>>>
>>>      Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
>>>
>>>>      On Mon, Jul 10, 2017 at 8:45 AM, Christian König
>>>>      <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>>>>
>>>>          Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
>>>>
>>>>>          On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie
>>>>>          <airlied@gmail.com <mailto:airlied@gmail.com>> wrote:
>>>>>          [SNIP]
>>>>>          So, reading some CTS tests again, and I think we have a
>>>>>          problem here.  The Vulkan spec allows you to wait on a fence
>>>>>          that is in the unsignaled state.
>>>>>
>>>>          At least on the closed source driver that would be illegal as
>>>>          far as I know.
>>>>
>>>>
>>>>      Then they are doing workarounds in userspace.  There are
>>>>      definitely CTS tests for this:
>>>>
>>>>      https://github.com/KhronosGroup/VK-GL-CTS/blob/master/
>>>> external/vulkancts/modules/vulkan/synchronization/vktSync
>>>> hronizationBasicFenceTests.cpp#L74
>>>>      <https://github.com/KhronosGroup/VK-GL-CTS/blob/master/
>>>> external/vulkancts/modules/vulkan/synchronization/vktSync
>>>> hronizationBasicFenceTests.cpp#L74>
>>>>
>>>>          You can't wait on a semaphore before the signal operation is
>>>>          send down to the kerel.
>>>>
>>>>
>>>>      We (Intel) deal with this today by tracking whether or not the
>>>>      fence has been submitted and using a condition variable in
>>>>      userspace to sort it all out.
>>>>
>>>      Which sounds exactly like what AMD is doing in it's drivers as well.
>>>
>>>
>>> Which doesn't work cross-process so...
>>>
>> Surely it can be made to work by providing suitable kernel APIs to
>> userspace?
>>
>
> Well, that's exactly what Jason proposed to do, but I'm not very keen of
> that.
>
>      If we ever want to share fences across processes (which we do),
>>>>      then this needs to be sorted in the kernel.
>>>>
>>>      That would clearly get a NAK from my side, even Microsoft forbids
>>>      wait before signal because you can easily end up in deadlock
>>> situations.
>>>
>>> Please don't NAK things that are required by the API specification and
>>> CTS tests.
>>>
>> There is no requirement for every aspect of the Vulkan API specification
>> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out
>> what makes sense at each level.
>>
>
> Exactly, if we have a synchronization problem between two processes that
> should be solved in userspace.
>
> E.g. if process A hasn't submitted it's work to the kernel it should flush
> it's commands before sending a flip event to the compositor.
>

Ok, I think there is some confusion here on what is being proposed.  Here
are some things that are *not* being proposed:

 1. This does *not* allow a client to block another client's GPU work
indefinitely.  This is entirely for a CPU wait API to allow for a "wait for
submit" as well as a "wait for finish".
 2. This is *not* for system compositors that need to be robust against
malicious clients.

The expected use for the OPAQUE_FD is two very tightly integrated processes
which trust each other but need to be able to share synchronization
primitives.  One of the things they need to be able to do (as per the
Vulkan API) with those synchronization primitives is a "wait for submit and
finish" operation.  I'm happy for the kernel to have separate APIs for
"wait for submit" and "wait for finish" if that's more palatable but i
don't really see why there is such a strong reaction to the "wait for
submit and finish" behavior.

Could we do this "in userspace"?  Yes, with added kernel API.  we would
need some way of strapping a second FD onto a syncobj or combining two FDs
into one to send across the wire or something like that, then add a shared
memory segment, and then pile on a bunch of code to do cross-process
condition variables and state tracking.  I really don't see how that's a
better solution than adding a flag to the kernel API to just do what we
want.
On Tue, Jul 11, 2017 at 12:22 AM, Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Jul 10, 2017 at 02:09:42PM -0700, Jason Ekstrand wrote:
> > On Mon, Jul 10, 2017 at 9:15 AM, Christian König <
> deathsimple@vodafone.de>
> > wrote:
> >
> > > Am 10.07.2017 um 17:52 schrieb Jason Ekstrand:
> > >
> > > On Mon, Jul 10, 2017 at 8:45 AM, Christian König <
> deathsimple@vodafone.de>
> > > wrote:
> > >
> > >> Am 10.07.2017 um 17:28 schrieb Jason Ekstrand:
> > >>
> > >> On Wed, Jul 5, 2017 at 6:04 PM, Dave Airlie <airlied@gmail.com>
> wrote:
> > >> [SNIP]
> > >> So, reading some CTS tests again, and I think we have a problem here.
> > >> The Vulkan spec allows you to wait on a fence that is in the
> unsignaled
> > >> state.
> > >>
> > >>
> > >> At least on the closed source driver that would be illegal as far as I
> > >> know.
> > >>
> > >
> > > Then they are doing workarounds in userspace.  There are definitely CTS
> > > tests for this:
> > >
> > > https://github.com/KhronosGroup/VK-GL-CTS/blob/
> master/external/vulkancts/
> > > modules/vulkan/synchronization/vktSynchronizationBasicFenceTe
> sts.cpp#L74
> > >
> > >
> > >> You can't wait on a semaphore before the signal operation is send
> down to
> > >> the kerel.
> > >>
> > >
> > > We (Intel) deal with this today by tracking whether or not the fence
> has
> > > been submitted and using a condition variable in userspace to sort it
> all
> > > out.
> > >
> > >
> > > Which sounds exactly like what AMD is doing in it's drivers as well.
> > >
> >
> > Which doesn't work cross-process so...
> >
> > > If we ever want to share fences across processes (which we do), then
> this
> > > needs to be sorted in the kernel.
> > >
> > >
> > > That would clearly get a NAK from my side, even Microsoft forbids wait
> > > before signal because you can easily end up in deadlock situations.
> > >
> >
> > Please don't NAK things that are required by the API specification and
> CTS
> > tests.  That makes it very hard for people like me to get their jobs
> done.
> > :-)
> >
> > Now, as for whether or not it's a good idea.  First off, we do have
> > timeouts an a status querying mechanism so an application can just set a
> > timeout of 1s and do something if it times out.  Second, if the
> application
> > is a compositor or something else that doesn't trust its client, it
> > shouldn't be using the OPAQUE_FD mechanism of Vulkan semaphore/fence
> > sharing anyway.  For those scenarios, they can require the untrusted
> client
> > to use FENCE_FD (sync file) and they have all of the usual guarantees
> about
> > when the work got submitted, etc.
> >
> > Also, I'm more than happy to put this all behind a flag so it's not the
> > default behavior.
>
> Android had a similar requirement to have a fence fd before the fence
> existed in hwc1, before they fixed that in hwc2. But it's probably still
> useful for deeply pipelined renderes with littel memory, aka tiled
> renderers on phones.
>
> The idea we've tossed around is to create a so-called future fence. In the
> kernel if you try to deref a future fence, the usual thing that happens is
> you'll block (interruptibly, which we can because fence lookup might
> fail), _until_ a real fence shows up and can be returned. That implements
> the uapi expectations without risking deadlocks in the kernel, albeit with
> a bit much blocking. Still better than doing the same in userspace (since
> in userspace you probably need to do that when importing the fence, not at
> execbuf time).
>

Yes, I'm aware of the future fence idea.  However, that's not really all
that related.  We're not talking about blocking GPU work here.  We're
talking about the CPU wait API having support for "wait for submit and
signal" behavior instead of just "wait for signal".
Am 11.07.2017 um 17:43 schrieb Jason Ekstrand:
> On Tue, Jul 11, 2017 at 12:17 AM, Christian König 
> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>
>     [SNIP]
>
>                      If we ever want to share fences across processes
>                 (which we do),
>                      then this needs to be sorted in the kernel.
>
>                  That would clearly get a NAK from my side, even
>             Microsoft forbids
>                  wait before signal because you can easily end up in
>             deadlock situations.
>
>             Please don't NAK things that are required by the API
>             specification and
>             CTS tests.
>
>         There is no requirement for every aspect of the Vulkan API
>         specification
>         to be mirrored 1:1 in the kernel <-> userspace API. We have to
>         work out
>         what makes sense at each level.
>
>
>     Exactly, if we have a synchronization problem between two
>     processes that should be solved in userspace.
>
>     E.g. if process A hasn't submitted it's work to the kernel it
>     should flush it's commands before sending a flip event to the
>     compositor.
>
>
> Ok, I think there is some confusion here on what is being proposed.  
> Here are some things that are *not* being proposed:
>
>  1. This does *not* allow a client to block another client's GPU work 
> indefinitely.  This is entirely for a CPU wait API to allow for a 
> "wait for submit" as well as a "wait for finish".
Yeah, that is a rather good point.

>  2. This is *not* for system compositors that need to be robust 
> against malicious clients.
I can see the point, but I think the kernel interface should still be 
idiot prove even in the unlikely case the universe suddenly stops to 
produce idiots.

> The expected use for the OPAQUE_FD is two very tightly integrated 
> processes which trust each other but need to be able to share 
> synchronization primitives.
Well, that raises a really important question: What is the actual use 
case for this if not communication between client and compositor?

> Could we do this "in userspace"?  Yes, with added kernel API.  we 
> would need some way of strapping a second FD onto a syncobj or 
> combining two FDs into one to send across the wire or something like 
> that, then add a shared memory segment, and then pile on a bunch of 
> code to do cross-process condition variables and state tracking.  I 
> really don't see how that's a better solution than adding a flag to 
> the kernel API to just do what we want.
Way to complicated.

My thinking was rather to optionally allow a single page to be mmap()ed 
into the process address space from the fd and then put a futex, 
pthread_cond or X shared memory fence or anything like that into it.

Regards,
Christian.
On 12 July 2017 at 17:39, Christian König <deathsimple@vodafone.de> wrote:
> Am 11.07.2017 um 17:43 schrieb Jason Ekstrand:
>
> On Tue, Jul 11, 2017 at 12:17 AM, Christian König <deathsimple@vodafone.de>
> wrote:
>>
>> [SNIP]
>>>>>
>>>>>      If we ever want to share fences across processes (which we do),
>>>>>      then this needs to be sorted in the kernel.
>>>>
>>>>      That would clearly get a NAK from my side, even Microsoft forbids
>>>>      wait before signal because you can easily end up in deadlock
>>>> situations.
>>>>
>>>> Please don't NAK things that are required by the API specification and
>>>> CTS tests.
>>>
>>> There is no requirement for every aspect of the Vulkan API specification
>>> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out
>>> what makes sense at each level.
>>
>>
>> Exactly, if we have a synchronization problem between two processes that
>> should be solved in userspace.
>>
>> E.g. if process A hasn't submitted it's work to the kernel it should flush
>> it's commands before sending a flip event to the compositor.
>
>
> Ok, I think there is some confusion here on what is being proposed.  Here
> are some things that are *not* being proposed:
>
>  1. This does *not* allow a client to block another client's GPU work
> indefinitely.  This is entirely for a CPU wait API to allow for a "wait for
> submit" as well as a "wait for finish".
>
> Yeah, that is a rather good point.
>
>  2. This is *not* for system compositors that need to be robust against
> malicious clients.
>
> I can see the point, but I think the kernel interface should still be idiot
> prove even in the unlikely case the universe suddenly stops to produce
> idiots.
>
> The expected use for the OPAQUE_FD is two very tightly integrated processes
> which trust each other but need to be able to share synchronization
> primitives.
>
> Well, that raises a really important question: What is the actual use case
> for this if not communication between client and compositor?

VR clients and compositors.

> Could we do this "in userspace"?  Yes, with added kernel API.  we would need
> some way of strapping a second FD onto a syncobj or combining two FDs into
> one to send across the wire or something like that, then add a shared memory
> segment, and then pile on a bunch of code to do cross-process condition
> variables and state tracking.  I really don't see how that's a better
> solution than adding a flag to the kernel API to just do what we want.
>
> Way to complicated.
>
> My thinking was rather to optionally allow a single page to be mmap()ed into
> the process address space from the fd and then put a futex, pthread_cond or
> X shared memory fence or anything like that into it.
>

Is that easier than just waiting in the kernel, I'm not sure how
optimised we need this path to be.

Dave.
On Wed, Jul 12, 2017 at 1:39 AM, Dave Airlie <airlied@gmail.com> wrote:

> On 12 July 2017 at 17:39, Christian König <deathsimple@vodafone.de> wrote:
> > Am 11.07.2017 um 17:43 schrieb Jason Ekstrand:
> >
> > On Tue, Jul 11, 2017 at 12:17 AM, Christian König <
> deathsimple@vodafone.de>
> > wrote:
> >>
> >> [SNIP]
> >>>>>
> >>>>>      If we ever want to share fences across processes (which we do),
> >>>>>      then this needs to be sorted in the kernel.
> >>>>
> >>>>      That would clearly get a NAK from my side, even Microsoft forbids
> >>>>      wait before signal because you can easily end up in deadlock
> >>>> situations.
> >>>>
> >>>> Please don't NAK things that are required by the API specification and
> >>>> CTS tests.
> >>>
> >>> There is no requirement for every aspect of the Vulkan API
> specification
> >>> to be mirrored 1:1 in the kernel <-> userspace API. We have to work out
> >>> what makes sense at each level.
> >>
> >>
> >> Exactly, if we have a synchronization problem between two processes that
> >> should be solved in userspace.
> >>
> >> E.g. if process A hasn't submitted it's work to the kernel it should
> flush
> >> it's commands before sending a flip event to the compositor.
> >
> >
> > Ok, I think there is some confusion here on what is being proposed.  Here
> > are some things that are *not* being proposed:
> >
> >  1. This does *not* allow a client to block another client's GPU work
> > indefinitely.  This is entirely for a CPU wait API to allow for a "wait
> for
> > submit" as well as a "wait for finish".
> >
> > Yeah, that is a rather good point.
> >
> >  2. This is *not* for system compositors that need to be robust against
> > malicious clients.
> >
> > I can see the point, but I think the kernel interface should still be
> idiot
> > prove even in the unlikely case the universe suddenly stops to produce
> > idiots.
>

Fair enough.  Maybe I've spent too much time in the Vulkan world where
being an idiot is, theoretically, disallowed.  And, by "disallowed", I mean
that you're free to be one with the understanding that your process may get
straight-up killed on *any* API violation.


> > The expected use for the OPAQUE_FD is two very tightly integrated
> processes
> > which trust each other but need to be able to share synchronization
> > primitives.
> >
> > Well, that raises a really important question: What is the actual use
> case
> > for this if not communication between client and compositor?
>
> VR clients and compositors.
>

Yeah, that.  Wouldn't they want the same security guarantees as your OS
compositor?  One would think, but none of the people making VR compositors
seem to care all that much about the case of malicious clients.  In
general, they run a fairly closed platform where they aren't really running
"arbitrary" apps on their compositor.  Also, they tend to write both the
client and server sides of the VR compositor protocol and the only thing
the app touches is their API.  I'm not going to try too hard to justify
their lack of concern about deadlock, but there it is.


> > Could we do this "in userspace"?  Yes, with added kernel API.  we would
> need
> > some way of strapping a second FD onto a syncobj or combining two FDs
> into
> > one to send across the wire or something like that, then add a shared
> memory
> > segment, and then pile on a bunch of code to do cross-process condition
> > variables and state tracking.  I really don't see how that's a better
> > solution than adding a flag to the kernel API to just do what we want.
> >
> > Way to complicated.
> >
> > My thinking was rather to optionally allow a single page to be mmap()ed
> into
> > the process address space from the fd and then put a futex, pthread_cond
> or
> > X shared memory fence or anything like that into it.
>

A single page + fence sounds a lot like a DRM BO.  One of my original plans
for implementing the feature was to just use a single-page BO and do some
userspace stuff in the mapped page.  There are two problems here:

1) It could be very easy for a malicious client to map the page and then
mess up whatever CPU data structure I use for the semaphore.  I could
probably make it robust but there is an attack vector there that's going to
be tricky.
2) I have no way, on import, to tell the difference between a 4K memory
object and a fence.

Then syncobj came along and promised to solve all my problems...


> Is that easier than just waiting in the kernel, I'm not sure how
> optimised we need this path to be.
>

I don't think so.  I think it's more-or-less the same code regardless of
how it's done.  The advantage of doing it in the kernel is that it's
standardized (we don't have to both go write that userspace code) and it
doesn't have the problems stated above.
Am 12.07.2017 um 17:53 schrieb Jason Ekstrand:
> [SNIP]
>
>     Is that easier than just waiting in the kernel, I'm not sure how
>     optimised we need this path to be.
>
>
> I don't think so.  I think it's more-or-less the same code regardless 
> of how it's done.  The advantage of doing it in the kernel is that 
> it's standardized (we don't have to both go write that userspace code) 
> and it doesn't have the problems stated above.

Ok, I'm convinced. The next price question is then how do we do it?

I mean writing an IOCTL to wait for a fence to appear is simple, but do 
we also need to wait for a fence to change?

As long as waits don't consume fences that might be rather tricky.

Christian.
On Wed, Jul 12, 2017 at 9:45 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 12.07.2017 um 17:53 schrieb Jason Ekstrand:
>
> [SNIP]
>
>
>> Is that easier than just waiting in the kernel, I'm not sure how
>> optimised we need this path to be.
>>
>
> I don't think so.  I think it's more-or-less the same code regardless of
> how it's done.  The advantage of doing it in the kernel is that it's
> standardized (we don't have to both go write that userspace code) and it
> doesn't have the problems stated above.
>
>
> Ok, I'm convinced. The next price question is then how do we do it?
>
> I mean writing an IOCTL to wait for a fence to appear is simple, but do we
> also need to wait for a fence to change?
>

I don't think so.  Taking a page from the Vulkan book, I think the "right"
thing to do would be to have a reset ioctl that simply deletes the
dma_fence and replaces it with NULL.  Then the only behavior you care about
is "wait for a fence to appear".  The usage pattern becomes:

 1) Submit work to signal the syncobj
 2) Wait on the syncobj (this may happen before the submit)
 3) Once everyone is done waiting, reset the syncobj

If someone does a wait on a stale syncobj that has completed and not yet
been reset, they return immediately.  Yes, there is a possible race between
2 and 3, especially if 2 is waiting on multiple syncobjs.  Ideally, we'd
make it so that a reset in the middle of someone else's wait wouldn't cause
them to wait until that syncobj gets triggerd a second time.  However, I
don't think it's a deal-breaker as any client that sets the "wait for
submit" flag should know that it's liable to wait forever and set a timeout.