[2/2,RFC] drm: add syncobj timeline support

Submitted by Zhou, David(ChunMing) on Aug. 22, 2018, 8:49 a.m.

Details

Message ID 20180822084928.19353-1-david1.zhou@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Zhou, David(ChunMing) Aug. 22, 2018, 8:49 a.m.
VK_KHR_timeline_semaphore:
This extension introduces a new type of semaphore that has an integer payload
identifying a point in a timeline. Such timeline semaphores support the
following operations:
  * Host query - A host operation that allows querying the payload of the
    timeline semaphore.
  * Host wait - A host operation that allows a blocking wait for a
    timeline semaphore to reach a specified value.
  * Device wait - A device operation that allows waiting for a
    timeline semaphore to reach a specified value.
  * Device signal - A device operation that allows advancing the
    timeline semaphore to a specified value.

Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
a. signal PT design:
Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
the timeline will increase to value of PT[N].
b. wait PT design:
Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
perform that.

TODO:
CPU query and wait on timeline semaphore.

Change-Id: I9f09aae225e268442c30451badac40406f24262c
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Christian Konig <christian.koenig@amd.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Daniel Rakos <Daniel.Rakos@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
 drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
 drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
 include/drm/drm_syncobj.h              |  45 +++-
 include/uapi/drm/drm.h                 |   3 +-
 6 files changed, 435 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d42d1c8f78f6..463cc8960723 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1105,7 +1105,7 @@  static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 {
 	int r;
 	struct dma_fence *fence;
-	r = drm_syncobj_find_fence(p->filp, handle, &fence);
+	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
 	if (r)
 		return r;
 
@@ -1193,8 +1193,9 @@  static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
 {
 	int i;
 
-	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
-		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
+		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
+	}
 }
 
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 70af32d0def1..3709f36c901e 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -187,6 +187,191 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
 
+static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
+{
+	struct rb_node *node = NULL;
+	struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+	spin_lock(&syncobj->lock);
+	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+	    node != NULL; node = rb_next(node)) {
+	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
+		    if (wait_pt->submission_fence)
+			dma_fence_signal(&wait_pt->submission_fence->base);
+	    } else {
+		    /* the loop is from left to right, the later entry value is
+		     * bigger, so don't need to check any more */
+		    break;
+	    }
+	}
+	spin_unlock(&syncobj->lock);
+}
+
+static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
+{
+	struct rb_node *node = NULL;
+	struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+	spin_lock(&syncobj->lock);
+	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
+	    node != NULL; ) {
+		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+		node = rb_next(node);
+		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
+			dma_fence_signal(&wait_pt->base.base);
+			rb_erase(&wait_pt->node,
+				 &syncobj->syncobj_timeline.wait_pt_tree);
+			RB_CLEAR_NODE(&wait_pt->node);
+			if (wait_pt->submission_fence)
+				dma_fence_put(&wait_pt->submission_fence->base);
+			/* kfree(wait_pt) is excuted by fence put */
+			dma_fence_put(&wait_pt->base.base);
+		} else {
+			/* the loop is from left to right, the later entry value is
+			 * bigger, so don't need to check any more */
+			break;
+		}
+	}
+	spin_unlock(&syncobj->lock);
+}
+
+
+static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
+{
+	struct dma_fence *fence = NULL;
+	struct drm_syncobj *syncobj;
+
+	fence = signal_pt->signal_fence;
+	signal_pt->signal_fence = NULL;
+	dma_fence_put(fence);
+	fence = signal_pt->pre_pt_base;
+	signal_pt->pre_pt_base = NULL;
+	dma_fence_put(fence);
+
+	syncobj = signal_pt->syncobj;
+	spin_lock(&syncobj->lock);
+	list_del(&signal_pt->list);
+	syncobj->syncobj_timeline.timeline = signal_pt->value;
+	spin_unlock(&syncobj->lock);
+	/* kfree(signal_pt) will be  executed by below fence put */
+	dma_fence_put(&signal_pt->base.base);
+	drm_syncobj_timeline_signal_wait_pts(syncobj);
+}
+static void pt_signal_fence_func(struct dma_fence *fence,
+				 struct dma_fence_cb *cb)
+{
+	struct drm_syncobj_signal_pt *signal_pt =
+		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
+
+	if (signal_pt->pre_pt_base &&
+	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
+		return;
+
+	pt_fence_cb(signal_pt);
+}
+static void pt_pre_fence_func(struct dma_fence *fence,
+				 struct dma_fence_cb *cb)
+{
+	struct drm_syncobj_signal_pt *signal_pt =
+		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
+
+	if (signal_pt->signal_fence &&
+	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
+		return;
+
+	pt_fence_cb(signal_pt);
+}
+
+static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
+					     struct dma_fence *fence,
+					     u64 point)
+{
+	struct drm_syncobj_signal_pt *signal_pt =
+		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
+	struct drm_syncobj_signal_pt *tail_pt;
+	struct dma_fence *tail_pt_fence = NULL;
+	int ret = 0;
+
+	if (!signal_pt)
+		return -ENOMEM;
+	if (syncobj->syncobj_timeline.signal_point >= point) {
+		DRM_WARN("A later signal is ready!");
+		goto out;
+	}
+	if (fence)
+		dma_fence_get(fence);
+	spin_lock(&syncobj->lock);
+	spin_lock_init(&signal_pt->base.lock);
+	dma_fence_init(&signal_pt->base.base,
+		       &drm_syncobj_stub_fence_ops,
+		       &signal_pt->base.lock,
+		       syncobj->syncobj_timeline.timeline_context, point);
+	signal_pt->signal_fence =
+		rcu_dereference_protected(fence,
+					  lockdep_is_held(&fence->lock));
+	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
+		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
+					  struct drm_syncobj_signal_pt, list);
+		tail_pt_fence = &tail_pt->base.base;
+		if (dma_fence_is_signaled(tail_pt_fence))
+			tail_pt_fence = NULL;
+	}
+	if (tail_pt_fence)
+		signal_pt->pre_pt_base =
+			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
+								lockdep_is_held(&tail_pt_fence->lock)));
+
+	signal_pt->value = point;
+	syncobj->syncobj_timeline.signal_point = point;
+	signal_pt->syncobj = syncobj;
+	INIT_LIST_HEAD(&signal_pt->list);
+	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
+	spin_unlock(&syncobj->lock);
+	drm_syncobj_timeline_signal_submission_fences(syncobj);
+	/**
+	 * Every pt is depending on signal fence and previous pt fence, add
+	 * callbacks to them
+	 */
+	if (!dma_fence_is_signaled(signal_pt->signal_fence))
+		dma_fence_add_callback(signal_pt->signal_fence,
+				       &signal_pt->signal_cb,
+				       pt_signal_fence_func);
+	else
+		pt_signal_fence_func(signal_pt->signal_fence,
+				     &signal_pt->signal_cb);
+	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
+		dma_fence_add_callback(signal_pt->pre_pt_base,
+				       &signal_pt->pre_pt_cb,
+				       pt_pre_fence_func);
+	else
+		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
+
+
+	return 0;
+out:
+	kfree(signal_pt);
+	return ret;
+}
+
+/**
+ * drm_syncobj_signal_fence - place fence into a sync object.
+ * @syncobj: Sync object to replace fence in
+ * @fence: fence to install in sync file.
+ *
+ * This places the fence into normal or timeline sync object
+ */
+void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
+			      struct dma_fence *fence,
+			      u64 point)
+{
+	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
+		drm_syncobj_replace_fence(syncobj, fence);
+	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
+		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
+}
+EXPORT_SYMBOL(drm_syncobj_signal_fence);
+
 static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
 	struct drm_syncobj_stub_fence *fence;
@@ -205,12 +390,150 @@  static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 
 	return 0;
 }
+static struct drm_syncobj_wait_pt *
+drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
+{
+    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
+    struct drm_syncobj_wait_pt *wait_pt = NULL;
+
+
+    spin_lock(&syncobj->lock);
+    while(node) {
+	    int result = point - wait_pt->value;
+
+	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+	    if (result < 0)
+		    node = node->rb_left;
+	    else if (result > 0)
+		    node = node->rb_right;
+	    else
+		    break;
+    }
+    spin_unlock(&syncobj->lock);
+
+    return wait_pt;
+}
+
+static struct drm_syncobj_wait_pt *
+drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
+{
+	struct drm_syncobj_wait_pt *wait_pt;
+	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
+
+	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
+	if (!wait_pt)
+		return NULL;
+	spin_lock_init(&wait_pt->base.lock);
+	dma_fence_init(&wait_pt->base.base,
+		       &drm_syncobj_stub_fence_ops,
+		       &wait_pt->base.lock,
+		       syncobj->syncobj_timeline.timeline_context, point);
+	wait_pt->submission_fence = NULL;
+	/* check if the pt is already sumbitted, if yes, then don't need to
+	 * create submission fence, otherwise create it */
+	if (point > syncobj->syncobj_timeline.signal_point) {
+		wait_pt->submission_fence =
+			kzalloc(sizeof(struct drm_syncobj_stub_fence),
+				GFP_KERNEL);
+		if (!wait_pt->submission_fence) {
+			dma_fence_put(&wait_pt->base.base);
+			return NULL;
+		}
+		spin_lock_init(&wait_pt->submission_fence->lock);
+		dma_fence_init(&wait_pt->submission_fence->base,
+			       &drm_syncobj_stub_fence_ops,
+			       &wait_pt->submission_fence->lock,
+			       syncobj->syncobj_timeline.submission_context,
+			       point);
+	}
+	wait_pt->value = point;
+
+	/* wait pt must be in an order, so that we can easily lookup and signal
+	 * it */
+	spin_lock(&syncobj->lock);
+	if (point <= syncobj->syncobj_timeline.timeline)
+		dma_fence_signal(&wait_pt->base.base);
+	if (wait_pt->submission_fence &&
+	    point <= syncobj->syncobj_timeline.signal_point)
+		dma_fence_signal(&wait_pt->submission_fence->base);
+	while(*new) {
+		struct drm_syncobj_wait_pt *this =
+			rb_entry(*new, struct drm_syncobj_wait_pt, node);
+		int result = wait_pt->value - this->value;
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			goto exist;
+	}
+
+	rb_link_node(&wait_pt->node, parent, new);
+	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
+	spin_unlock(&syncobj->lock);
+	return wait_pt;
+exist:
+	spin_unlock(&syncobj->lock);
+	if (wait_pt->submission_fence)
+		dma_fence_put(&wait_pt->submission_fence->base);
+	dma_fence_put(&wait_pt->base.base);
+	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
+	return wait_pt;
+}
+
+static struct dma_fence *
+drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
+{
+	struct drm_syncobj_wait_pt *wait_pt;
+
+	/* already signaled, simply return a signaled stub fence */
+	if (point <= syncobj->syncobj_timeline.timeline) {
+		struct drm_syncobj_stub_fence *fence;
+
+		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+		if (fence == NULL)
+			return NULL;
+
+		spin_lock_init(&fence->lock);
+		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
+			       &fence->lock, 0, 0);
+		dma_fence_signal(&fence->base);
+		return &fence->base;
+	}
+
+	/* check if the wait pt exists */
+	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
+	if (!wait_pt) {
+		/* This is a new wait pt, so create it */
+		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
+		if (!wait_pt)
+			return NULL;
+	}
+	if (wait_pt) {
+		struct dma_fence *fence;
+		if (wait_pt->submission_fence) {
+			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+				dma_fence_wait(&wait_pt->submission_fence->base, true);
+			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
+				return NULL;
+		}
+		rcu_read_lock();
+		fence = dma_fence_get_rcu(&wait_pt->base.base);
+		rcu_read_unlock();
+		return fence;
+	}
+	return NULL;
+}
+
 
 /**
  * drm_syncobj_find_fence - lookup and reference the fence in a sync object
  * @file_private: drm file private pointer
  * @handle: sync object handle to lookup.
  * @fence: out parameter for the fence
+ * @point: timeline point
  *
  * This is just a convenience function that combines drm_syncobj_find() and
  * drm_syncobj_fence_get().
@@ -221,7 +544,8 @@  static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
  */
 int drm_syncobj_find_fence(struct drm_file *file_private,
 			   u32 handle,
-			   struct dma_fence **fence)
+			   struct dma_fence **fence,
+			   u64 point)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
 	int ret = 0;
@@ -229,7 +553,15 @@  int drm_syncobj_find_fence(struct drm_file *file_private,
 	if (!syncobj)
 		return -ENOENT;
 
-	*fence = drm_syncobj_fence_get(syncobj);
+	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
+		*fence = drm_syncobj_fence_get(syncobj);
+	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
+		*fence = drm_syncobj_timeline_point_get(syncobj, point,
+							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
+	} else {
+		DRM_ERROR("invalid syncobj type\n");
+		return -EINVAL;
+	}
 	if (!*fence) {
 		ret = -EINVAL;
 	}
@@ -238,6 +570,35 @@  int drm_syncobj_find_fence(struct drm_file *file_private,
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
 
+static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
+				      struct drm_syncobj_timeline *syncobj_timeline)
+{
+	struct rb_node *node = NULL;
+	struct drm_syncobj_wait_pt *wait_pt = NULL;
+	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
+
+	spin_lock(&syncobj->lock);
+	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
+	    node != NULL; ) {
+		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
+		node = rb_next(node);
+		rb_erase(&wait_pt->node,
+			 &syncobj_timeline->wait_pt_tree);
+		RB_CLEAR_NODE(&wait_pt->node);
+		if (wait_pt->submission_fence)
+			dma_fence_put(&wait_pt->submission_fence->base);
+		/* kfree(wait_pt) is excuted by fence put */
+		dma_fence_put(&wait_pt->base.base);
+	}
+	list_for_each_entry_safe(signal_pt, tmp,
+				 &syncobj_timeline->signal_pt_list, list) {
+		list_del(&signal_pt->list);
+		dma_fence_put(signal_pt->signal_fence);
+		dma_fence_put(signal_pt->pre_pt_base);
+		dma_fence_put(&signal_pt->base.base);
+	}
+	spin_unlock(&syncobj->lock);
+}
 /**
  * drm_syncobj_free - free a sync object.
  * @kref: kref to free.
@@ -250,10 +611,22 @@  void drm_syncobj_free(struct kref *kref)
 						   struct drm_syncobj,
 						   refcount);
 	drm_syncobj_replace_fence(syncobj, NULL);
+	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
 	kfree(syncobj);
 }
 EXPORT_SYMBOL(drm_syncobj_free);
 
+static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
+				      *syncobj_timeline)
+{
+	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
+	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
+	syncobj_timeline->timeline = 0;
+	syncobj_timeline->signal_point = 0;
+
+	syncobj_timeline->wait_pt_tree = RB_ROOT;
+	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
+}
 /**
  * drm_syncobj_create - create a new syncobj
  * @out_syncobj: returned syncobj
@@ -279,6 +652,12 @@  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 	kref_init(&syncobj->refcount);
 	INIT_LIST_HEAD(&syncobj->cb_list);
 	spin_lock_init(&syncobj->lock);
+	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
+		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
+		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
+	} else {
+		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
+	}
 
 	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
 		ret = drm_syncobj_assign_null_handle(syncobj);
@@ -491,7 +870,7 @@  static int drm_syncobj_export_sync_file(struct drm_file *file_private,
 	if (fd < 0)
 		return fd;
 
-	ret = drm_syncobj_find_fence(file_private, handle, &fence);
+	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
 	if (ret)
 		goto err_put_fd;
 
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 9029590267aa..f24c3eccb4d5 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -521,12 +521,12 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	kref_init(&exec->refcount);
 
 	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
-				     &exec->bin.in_fence);
+				     &exec->bin.in_fence, 0);
 	if (ret == -EINVAL)
 		goto fail;
 
 	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
-				     &exec->render.in_fence);
+				     &exec->render.in_fence, 0);
 	if (ret == -EINVAL)
 		goto fail;
 
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 7910b9acedd6..f7b4971342e8 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -1173,7 +1173,7 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 
 	if (args->in_sync) {
 		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
-					     &in_fence);
+					     &in_fence, 0);
 		if (ret)
 			goto fail;
 
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index b04c492ddbb5..b6e193c6cc9a 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -54,6 +54,41 @@  const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
 	.release = NULL,
 };
 
+enum drm_syncobj_type {
+	DRM_SYNCOBJ_TYPE_NORMAL,
+	DRM_SYNCOBJ_TYPE_TIMELINE
+};
+
+struct drm_syncobj;
+struct drm_syncobj_wait_pt {
+	struct drm_syncobj_stub_fence base;
+	struct drm_syncobj_stub_fence *submission_fence;
+	u64    value;
+	struct rb_node   node;
+};
+struct drm_syncobj_signal_pt {
+	struct drm_syncobj_stub_fence base;
+	struct dma_fence *signal_fence;
+	struct dma_fence *pre_pt_base;
+	struct dma_fence_cb signal_cb;
+	struct dma_fence_cb pre_pt_cb;
+	struct drm_syncobj *syncobj;
+	u64    value;
+	struct list_head list;
+};
+struct drm_syncobj_timeline {
+	u64 timeline_context;
+	u64 submission_context;
+	/**
+	 * @timeline: syncobj timeline
+	 */
+	u64 timeline;
+	u64 signal_point;
+
+
+	struct rb_root wait_pt_tree;
+	struct list_head signal_pt_list;
+};
 /**
  * struct drm_syncobj - sync object.
  *
@@ -64,6 +99,11 @@  struct drm_syncobj {
 	 * @refcount: Reference count of this object.
 	 */
 	struct kref refcount;
+	/**
+	 * @type: indicate syncobj type
+	 */
+	enum drm_syncobj_type type;
+	struct drm_syncobj_timeline syncobj_timeline;
 	/**
 	 * @fence:
 	 * NULL or a pointer to the fence bound to this object.
@@ -162,9 +202,12 @@  void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
 				 struct drm_syncobj_cb *cb);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence);
+void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
+			      struct dma_fence *fence,
+			      u64 point);
 int drm_syncobj_find_fence(struct drm_file *file_private,
 			   u32 handle,
-			   struct dma_fence **fence);
+			   struct dma_fence **fence, u64 point);
 void drm_syncobj_free(struct kref *kref);
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 		       struct dma_fence *fence);
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 300f336633f2..71e6cd1c88f8 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -717,6 +717,8 @@  struct drm_prime_handle {
 struct drm_syncobj_create {
 	__u32 handle;
 #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
+#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
+#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
 	__u32 flags;
 };
 
@@ -734,7 +736,6 @@  struct drm_syncobj_handle {
 	__s32 fd;
 	__u32 pad;
 };
-
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 struct drm_syncobj_wait {

Comments

Am 22.08.2018 um 10:49 schrieb Chunming Zhou:
> VK_KHR_timeline_semaphore:
> This extension introduces a new type of semaphore that has an integer payload
> identifying a point in a timeline. Such timeline semaphores support the
> following operations:
>    * Host query - A host operation that allows querying the payload of the
>      timeline semaphore.
>    * Host wait - A host operation that allows a blocking wait for a
>      timeline semaphore to reach a specified value.
>    * Device wait - A device operation that allows waiting for a
>      timeline semaphore to reach a specified value.
>    * Device signal - A device operation that allows advancing the
>      timeline semaphore to a specified value.
>
> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
> a. signal PT design:
> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
> the timeline will increase to value of PT[N].
> b. wait PT design:
> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
> perform that.
>
> TODO:
> CPU query and wait on timeline semaphore.

Please drop DRM_SYNCOBJ_CREATE_TYPE_NORMAL, that isn't used in the whole 
implementation.

Additional to that I would split up the change to 
drm_syncobj_find_fence() in a separate patch.

Christian.

>
> Change-Id: I9f09aae225e268442c30451badac40406f24262c
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Cc: Christian Konig <christian.koenig@amd.com>
> Cc: Dave Airlie <airlied@redhat.com>
> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>   drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>   drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>   include/drm/drm_syncobj.h              |  45 +++-
>   include/uapi/drm/drm.h                 |   3 +-
>   6 files changed, 435 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index d42d1c8f78f6..463cc8960723 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>   {
>   	int r;
>   	struct dma_fence *fence;
> -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
> +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>   	if (r)
>   		return r;
>   
> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>   {
>   	int i;
>   
> -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
> -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
> +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
> +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
> +	}
>   }
>   
>   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 70af32d0def1..3709f36c901e 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   }
>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>   
> +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +	    node != NULL; node = rb_next(node)) {
> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
> +		    if (wait_pt->submission_fence)
> +			dma_fence_signal(&wait_pt->submission_fence->base);
> +	    } else {
> +		    /* the loop is from left to right, the later entry value is
> +		     * bigger, so don't need to check any more */
> +		    break;
> +	    }
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
> +			dma_fence_signal(&wait_pt->base.base);
> +			rb_erase(&wait_pt->node,
> +				 &syncobj->syncobj_timeline.wait_pt_tree);
> +			RB_CLEAR_NODE(&wait_pt->node);
> +			if (wait_pt->submission_fence)
> +				dma_fence_put(&wait_pt->submission_fence->base);
> +			/* kfree(wait_pt) is excuted by fence put */
> +			dma_fence_put(&wait_pt->base.base);
> +		} else {
> +			/* the loop is from left to right, the later entry value is
> +			 * bigger, so don't need to check any more */
> +			break;
> +		}
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
> +
> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
> +{
> +	struct dma_fence *fence = NULL;
> +	struct drm_syncobj *syncobj;
> +
> +	fence = signal_pt->signal_fence;
> +	signal_pt->signal_fence = NULL;
> +	dma_fence_put(fence);
> +	fence = signal_pt->pre_pt_base;
> +	signal_pt->pre_pt_base = NULL;
> +	dma_fence_put(fence);
> +
> +	syncobj = signal_pt->syncobj;
> +	spin_lock(&syncobj->lock);
> +	list_del(&signal_pt->list);
> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
> +	spin_unlock(&syncobj->lock);
> +	/* kfree(signal_pt) will be  executed by below fence put */
> +	dma_fence_put(&signal_pt->base.base);
> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
> +}
> +static void pt_signal_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
> +
> +	if (signal_pt->pre_pt_base &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +static void pt_pre_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
> +
> +	if (signal_pt->signal_fence &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +
> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
> +					     struct dma_fence *fence,
> +					     u64 point)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> +	struct drm_syncobj_signal_pt *tail_pt;
> +	struct dma_fence *tail_pt_fence = NULL;
> +	int ret = 0;
> +
> +	if (!signal_pt)
> +		return -ENOMEM;
> +	if (syncobj->syncobj_timeline.signal_point >= point) {
> +		DRM_WARN("A later signal is ready!");
> +		goto out;
> +	}
> +	if (fence)
> +		dma_fence_get(fence);
> +	spin_lock(&syncobj->lock);
> +	spin_lock_init(&signal_pt->base.lock);
> +	dma_fence_init(&signal_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &signal_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	signal_pt->signal_fence =
> +		rcu_dereference_protected(fence,
> +					  lockdep_is_held(&fence->lock));
> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
> +					  struct drm_syncobj_signal_pt, list);
> +		tail_pt_fence = &tail_pt->base.base;
> +		if (dma_fence_is_signaled(tail_pt_fence))
> +			tail_pt_fence = NULL;
> +	}
> +	if (tail_pt_fence)
> +		signal_pt->pre_pt_base =
> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
> +								lockdep_is_held(&tail_pt_fence->lock)));
> +
> +	signal_pt->value = point;
> +	syncobj->syncobj_timeline.signal_point = point;
> +	signal_pt->syncobj = syncobj;
> +	INIT_LIST_HEAD(&signal_pt->list);
> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
> +	spin_unlock(&syncobj->lock);
> +	drm_syncobj_timeline_signal_submission_fences(syncobj);
> +	/**
> +	 * Every pt is depending on signal fence and previous pt fence, add
> +	 * callbacks to them
> +	 */
> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
> +		dma_fence_add_callback(signal_pt->signal_fence,
> +				       &signal_pt->signal_cb,
> +				       pt_signal_fence_func);
> +	else
> +		pt_signal_fence_func(signal_pt->signal_fence,
> +				     &signal_pt->signal_cb);
> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		dma_fence_add_callback(signal_pt->pre_pt_base,
> +				       &signal_pt->pre_pt_cb,
> +				       pt_pre_fence_func);
> +	else
> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
> +
> +
> +	return 0;
> +out:
> +	kfree(signal_pt);
> +	return ret;
> +}
> +
> +/**
> + * drm_syncobj_signal_fence - place fence into a sync object.
> + * @syncobj: Sync object to replace fence in
> + * @fence: fence to install in sync file.
> + *
> + * This places the fence into normal or timeline sync object
> + */
> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> +			      struct dma_fence *fence,
> +			      u64 point)
> +{
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
> +		drm_syncobj_replace_fence(syncobj, fence);
> +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
> +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
> +}
> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
> +
>   static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   {
>   	struct drm_syncobj_stub_fence *fence;
> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   
>   	return 0;
>   }
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +
> +    spin_lock(&syncobj->lock);
> +    while(node) {
> +	    int result = point - wait_pt->value;
> +
> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	    if (result < 0)
> +		    node = node->rb_left;
> +	    else if (result > 0)
> +		    node = node->rb_right;
> +	    else
> +		    break;
> +    }
> +    spin_unlock(&syncobj->lock);
> +
> +    return wait_pt;
> +}
> +
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
> +
> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
> +	if (!wait_pt)
> +		return NULL;
> +	spin_lock_init(&wait_pt->base.lock);
> +	dma_fence_init(&wait_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &wait_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	wait_pt->submission_fence = NULL;
> +	/* check if the pt is already sumbitted, if yes, then don't need to
> +	 * create submission fence, otherwise create it */
> +	if (point > syncobj->syncobj_timeline.signal_point) {
> +		wait_pt->submission_fence =
> +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
> +				GFP_KERNEL);
> +		if (!wait_pt->submission_fence) {
> +			dma_fence_put(&wait_pt->base.base);
> +			return NULL;
> +		}
> +		spin_lock_init(&wait_pt->submission_fence->lock);
> +		dma_fence_init(&wait_pt->submission_fence->base,
> +			       &drm_syncobj_stub_fence_ops,
> +			       &wait_pt->submission_fence->lock,
> +			       syncobj->syncobj_timeline.submission_context,
> +			       point);
> +	}
> +	wait_pt->value = point;
> +
> +	/* wait pt must be in an order, so that we can easily lookup and signal
> +	 * it */
> +	spin_lock(&syncobj->lock);
> +	if (point <= syncobj->syncobj_timeline.timeline)
> +		dma_fence_signal(&wait_pt->base.base);
> +	if (wait_pt->submission_fence &&
> +	    point <= syncobj->syncobj_timeline.signal_point)
> +		dma_fence_signal(&wait_pt->submission_fence->base);
> +	while(*new) {
> +		struct drm_syncobj_wait_pt *this =
> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
> +		int result = wait_pt->value - this->value;
> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			goto exist;
> +	}
> +
> +	rb_link_node(&wait_pt->node, parent, new);
> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
> +	spin_unlock(&syncobj->lock);
> +	return wait_pt;
> +exist:
> +	spin_unlock(&syncobj->lock);
> +	if (wait_pt->submission_fence)
> +		dma_fence_put(&wait_pt->submission_fence->base);
> +	dma_fence_put(&wait_pt->base.base);
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	return wait_pt;
> +}
> +
> +static struct dma_fence *
> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +
> +	/* already signaled, simply return a signaled stub fence */
> +	if (point <= syncobj->syncobj_timeline.timeline) {
> +		struct drm_syncobj_stub_fence *fence;
> +
> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +		if (fence == NULL)
> +			return NULL;
> +
> +		spin_lock_init(&fence->lock);
> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
> +			       &fence->lock, 0, 0);
> +		dma_fence_signal(&fence->base);
> +		return &fence->base;
> +	}
> +
> +	/* check if the wait pt exists */
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	if (!wait_pt) {
> +		/* This is a new wait pt, so create it */
> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
> +		if (!wait_pt)
> +			return NULL;
> +	}
> +	if (wait_pt) {
> +		struct dma_fence *fence;
> +		if (wait_pt->submission_fence) {
> +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +				dma_fence_wait(&wait_pt->submission_fence->base, true);
> +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
> +				return NULL;
> +		}
> +		rcu_read_lock();
> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
> +		rcu_read_unlock();
> +		return fence;
> +	}
> +	return NULL;
> +}
> +
>   
>   /**
>    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>    * @file_private: drm file private pointer
>    * @handle: sync object handle to lookup.
>    * @fence: out parameter for the fence
> + * @point: timeline point
>    *
>    * This is just a convenience function that combines drm_syncobj_find() and
>    * drm_syncobj_fence_get().
> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>    */
>   int drm_syncobj_find_fence(struct drm_file *file_private,
>   			   u32 handle,
> -			   struct dma_fence **fence)
> +			   struct dma_fence **fence,
> +			   u64 point)
>   {
>   	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>   	int ret = 0;
> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>   	if (!syncobj)
>   		return -ENOENT;
>   
> -	*fence = drm_syncobj_fence_get(syncobj);
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> +		*fence = drm_syncobj_fence_get(syncobj);
> +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
> +	} else {
> +		DRM_ERROR("invalid syncobj type\n");
> +		return -EINVAL;
> +	}
>   	if (!*fence) {
>   		ret = -EINVAL;
>   	}
> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>   }
>   EXPORT_SYMBOL(drm_syncobj_find_fence);
>   
> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
> +				      struct drm_syncobj_timeline *syncobj_timeline)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		rb_erase(&wait_pt->node,
> +			 &syncobj_timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		if (wait_pt->submission_fence)
> +			dma_fence_put(&wait_pt->submission_fence->base);
> +		/* kfree(wait_pt) is excuted by fence put */
> +		dma_fence_put(&wait_pt->base.base);
> +	}
> +	list_for_each_entry_safe(signal_pt, tmp,
> +				 &syncobj_timeline->signal_pt_list, list) {
> +		list_del(&signal_pt->list);
> +		dma_fence_put(signal_pt->signal_fence);
> +		dma_fence_put(signal_pt->pre_pt_base);
> +		dma_fence_put(&signal_pt->base.base);
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
>   /**
>    * drm_syncobj_free - free a sync object.
>    * @kref: kref to free.
> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>   						   struct drm_syncobj,
>   						   refcount);
>   	drm_syncobj_replace_fence(syncobj, NULL);
> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>   	kfree(syncobj);
>   }
>   EXPORT_SYMBOL(drm_syncobj_free);
>   
> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
> +				      *syncobj_timeline)
> +{
> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->timeline = 0;
> +	syncobj_timeline->signal_point = 0;
> +
> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
> +}
>   /**
>    * drm_syncobj_create - create a new syncobj
>    * @out_syncobj: returned syncobj
> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   	kref_init(&syncobj->refcount);
>   	INIT_LIST_HEAD(&syncobj->cb_list);
>   	spin_lock_init(&syncobj->lock);
> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
> +	} else {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
> +	}
>   
>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>   		ret = drm_syncobj_assign_null_handle(syncobj);
> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
>   	if (fd < 0)
>   		return fd;
>   
> -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
> +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>   	if (ret)
>   		goto err_put_fd;
>   
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 9029590267aa..f24c3eccb4d5 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>   	kref_init(&exec->refcount);
>   
>   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
> -				     &exec->bin.in_fence);
> +				     &exec->bin.in_fence, 0);
>   	if (ret == -EINVAL)
>   		goto fail;
>   
>   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
> -				     &exec->render.in_fence);
> +				     &exec->render.in_fence, 0);
>   	if (ret == -EINVAL)
>   		goto fail;
>   
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 7910b9acedd6..f7b4971342e8 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>   
>   	if (args->in_sync) {
>   		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
> -					     &in_fence);
> +					     &in_fence, 0);
>   		if (ret)
>   			goto fail;
>   
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index b04c492ddbb5..b6e193c6cc9a 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>   	.release = NULL,
>   };
>   
> +enum drm_syncobj_type {
> +	DRM_SYNCOBJ_TYPE_NORMAL,
> +	DRM_SYNCOBJ_TYPE_TIMELINE
> +};
> +
> +struct drm_syncobj;
> +struct drm_syncobj_wait_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct drm_syncobj_stub_fence *submission_fence;
> +	u64    value;
> +	struct rb_node   node;
> +};
> +struct drm_syncobj_signal_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct dma_fence *signal_fence;
> +	struct dma_fence *pre_pt_base;
> +	struct dma_fence_cb signal_cb;
> +	struct dma_fence_cb pre_pt_cb;
> +	struct drm_syncobj *syncobj;
> +	u64    value;
> +	struct list_head list;
> +};
> +struct drm_syncobj_timeline {
> +	u64 timeline_context;
> +	u64 submission_context;
> +	/**
> +	 * @timeline: syncobj timeline
> +	 */
> +	u64 timeline;
> +	u64 signal_point;
> +
> +
> +	struct rb_root wait_pt_tree;
> +	struct list_head signal_pt_list;
> +};
>   /**
>    * struct drm_syncobj - sync object.
>    *
> @@ -64,6 +99,11 @@ struct drm_syncobj {
>   	 * @refcount: Reference count of this object.
>   	 */
>   	struct kref refcount;
> +	/**
> +	 * @type: indicate syncobj type
> +	 */
> +	enum drm_syncobj_type type;
> +	struct drm_syncobj_timeline syncobj_timeline;
>   	/**
>   	 * @fence:
>   	 * NULL or a pointer to the fence bound to this object.
> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>   				 struct drm_syncobj_cb *cb);
>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   			       struct dma_fence *fence);
> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> +			      struct dma_fence *fence,
> +			      u64 point);
>   int drm_syncobj_find_fence(struct drm_file *file_private,
>   			   u32 handle,
> -			   struct dma_fence **fence);
> +			   struct dma_fence **fence, u64 point);
>   void drm_syncobj_free(struct kref *kref);
>   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   		       struct dma_fence *fence);
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 300f336633f2..71e6cd1c88f8 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>   struct drm_syncobj_create {
>   	__u32 handle;
>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>   	__u32 flags;
>   };
>   
> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>   	__s32 fd;
>   	__u32 pad;
>   };
> -
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>   struct drm_syncobj_wait {
Am 22.08.2018 um 10:57 schrieb Christian König:
> Am 22.08.2018 um 10:49 schrieb Chunming Zhou:
>> VK_KHR_timeline_semaphore:
>> This extension introduces a new type of semaphore that has an integer 
>> payload
>> identifying a point in a timeline. Such timeline semaphores support the
>> following operations:
>>    * Host query - A host operation that allows querying the payload 
>> of the
>>      timeline semaphore.
>>    * Host wait - A host operation that allows a blocking wait for a
>>      timeline semaphore to reach a specified value.
>>    * Device wait - A device operation that allows waiting for a
>>      timeline semaphore to reach a specified value.
>>    * Device signal - A device operation that allows advancing the
>>      timeline semaphore to a specified value.
>>
>> Since it's a timeline, that means the front time point(PT) always is 
>> signaled before the late PT.
>> a. signal PT design:
>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, 
>> when PT[N] fence is signaled,
>> the timeline will increase to value of PT[N].
>> b. wait PT design:
>> Wait PT fence is signaled by reaching timeline point value, when 
>> timeline is increasing, will compare
>> wait PTs value with new timeline value, if PT value is lower than 
>> timeline value, then wait PT will be
>> signaled, otherwise keep in list. semaphore wait operation can wait 
>> on any point of timeline,
>> so need a RB tree to order them. And wait PT could ahead of signal 
>> PT, we need a sumission fence to
>> perform that.
>>
>> TODO:
>> CPU query and wait on timeline semaphore.
>
> Please drop DRM_SYNCOBJ_CREATE_TYPE_NORMAL, that isn't used in the 
> whole implementation.
>
> Additional to that I would split up the change to 
> drm_syncobj_find_fence() in a separate patch.

And please drop the submission_fence implementation and instead use 
wait_event() for that.

Implementing this as a fence has the possiblity that somebody is abusing 
it for wait before signal.

Christian.

>
> Christian.
>
>>
>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Christian Konig <christian.koenig@amd.com>
>> Cc: Dave Airlie <airlied@redhat.com>
>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>   drivers/gpu/drm/drm_syncobj.c          | 385 
>> ++++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>   drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>   include/drm/drm_syncobj.h              |  45 +++-
>>   include/uapi/drm/drm.h                 |   3 +-
>>   6 files changed, 435 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d42d1c8f78f6..463cc8960723 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1105,7 +1105,7 @@ static int 
>> amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>   {
>>       int r;
>>       struct dma_fence *fence;
>> -    r = drm_syncobj_find_fence(p->filp, handle, &fence);
>> +    r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>       if (r)
>>           return r;
>>   @@ -1193,8 +1193,9 @@ static void 
>> amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>   {
>>       int i;
>>   -    for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>> -        drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
>> +    for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>> +        drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
>> +    }
>>   }
>>     static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index 70af32d0def1..3709f36c901e 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct 
>> drm_syncobj *syncobj,
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>   +static void drm_syncobj_timeline_signal_submission_fences(struct 
>> drm_syncobj *syncobj)
>> +{
>> +    struct rb_node *node = NULL;
>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>> +        node != NULL; node = rb_next(node)) {
>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +        if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
>> +            if (wait_pt->submission_fence)
>> + dma_fence_signal(&wait_pt->submission_fence->base);
>> +        } else {
>> +            /* the loop is from left to right, the later entry value is
>> +             * bigger, so don't need to check any more */
>> +            break;
>> +        }
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +}
>> +
>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj 
>> *syncobj)
>> +{
>> +    struct rb_node *node = NULL;
>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>> +        node != NULL; ) {
>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +        node = rb_next(node);
>> +        if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>> +            dma_fence_signal(&wait_pt->base.base);
>> +            rb_erase(&wait_pt->node,
>> + &syncobj->syncobj_timeline.wait_pt_tree);
>> +            RB_CLEAR_NODE(&wait_pt->node);
>> +            if (wait_pt->submission_fence)
>> + dma_fence_put(&wait_pt->submission_fence->base);
>> +            /* kfree(wait_pt) is excuted by fence put */
>> +            dma_fence_put(&wait_pt->base.base);
>> +        } else {
>> +            /* the loop is from left to right, the later entry value is
>> +             * bigger, so don't need to check any more */
>> +            break;
>> +        }
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +}
>> +
>> +
>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>> +{
>> +    struct dma_fence *fence = NULL;
>> +    struct drm_syncobj *syncobj;
>> +
>> +    fence = signal_pt->signal_fence;
>> +    signal_pt->signal_fence = NULL;
>> +    dma_fence_put(fence);
>> +    fence = signal_pt->pre_pt_base;
>> +    signal_pt->pre_pt_base = NULL;
>> +    dma_fence_put(fence);
>> +
>> +    syncobj = signal_pt->syncobj;
>> +    spin_lock(&syncobj->lock);
>> +    list_del(&signal_pt->list);
>> +    syncobj->syncobj_timeline.timeline = signal_pt->value;
>> +    spin_unlock(&syncobj->lock);
>> +    /* kfree(signal_pt) will be  executed by below fence put */
>> +    dma_fence_put(&signal_pt->base.base);
>> +    drm_syncobj_timeline_signal_wait_pts(syncobj);
>> +}
>> +static void pt_signal_fence_func(struct dma_fence *fence,
>> +                 struct dma_fence_cb *cb)
>> +{
>> +    struct drm_syncobj_signal_pt *signal_pt =
>> +        container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>> +
>> +    if (signal_pt->pre_pt_base &&
>> +        !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +        return;
>> +
>> +    pt_fence_cb(signal_pt);
>> +}
>> +static void pt_pre_fence_func(struct dma_fence *fence,
>> +                 struct dma_fence_cb *cb)
>> +{
>> +    struct drm_syncobj_signal_pt *signal_pt =
>> +        container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>> +
>> +    if (signal_pt->signal_fence &&
>> +        !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +        return;
>> +
>> +    pt_fence_cb(signal_pt);
>> +}
>> +
>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj 
>> *syncobj,
>> +                         struct dma_fence *fence,
>> +                         u64 point)
>> +{
>> +    struct drm_syncobj_signal_pt *signal_pt =
>> +        kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>> +    struct drm_syncobj_signal_pt *tail_pt;
>> +    struct dma_fence *tail_pt_fence = NULL;
>> +    int ret = 0;
>> +
>> +    if (!signal_pt)
>> +        return -ENOMEM;
>> +    if (syncobj->syncobj_timeline.signal_point >= point) {
>> +        DRM_WARN("A later signal is ready!");
>> +        goto out;
>> +    }
>> +    if (fence)
>> +        dma_fence_get(fence);
>> +    spin_lock(&syncobj->lock);
>> +    spin_lock_init(&signal_pt->base.lock);
>> +    dma_fence_init(&signal_pt->base.base,
>> +               &drm_syncobj_stub_fence_ops,
>> +               &signal_pt->base.lock,
>> +               syncobj->syncobj_timeline.timeline_context, point);
>> +    signal_pt->signal_fence =
>> +        rcu_dereference_protected(fence,
>> +                      lockdep_is_held(&fence->lock));
>> +    if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>> +        tail_pt = 
>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>> +                      struct drm_syncobj_signal_pt, list);
>> +        tail_pt_fence = &tail_pt->base.base;
>> +        if (dma_fence_is_signaled(tail_pt_fence))
>> +            tail_pt_fence = NULL;
>> +    }
>> +    if (tail_pt_fence)
>> +        signal_pt->pre_pt_base =
>> + dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>> + lockdep_is_held(&tail_pt_fence->lock)));
>> +
>> +    signal_pt->value = point;
>> +    syncobj->syncobj_timeline.signal_point = point;
>> +    signal_pt->syncobj = syncobj;
>> +    INIT_LIST_HEAD(&signal_pt->list);
>> +    list_add_tail(&signal_pt->list, 
>> &syncobj->syncobj_timeline.signal_pt_list);
>> +    spin_unlock(&syncobj->lock);
>> +    drm_syncobj_timeline_signal_submission_fences(syncobj);
>> +    /**
>> +     * Every pt is depending on signal fence and previous pt fence, add
>> +     * callbacks to them
>> +     */
>> +    if (!dma_fence_is_signaled(signal_pt->signal_fence))
>> +        dma_fence_add_callback(signal_pt->signal_fence,
>> +                       &signal_pt->signal_cb,
>> +                       pt_signal_fence_func);
>> +    else
>> +        pt_signal_fence_func(signal_pt->signal_fence,
>> +                     &signal_pt->signal_cb);
>> +    if (signal_pt->pre_pt_base && 
>> !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +        dma_fence_add_callback(signal_pt->pre_pt_base,
>> +                       &signal_pt->pre_pt_cb,
>> +                       pt_pre_fence_func);
>> +    else
>> +        pt_pre_fence_func(signal_pt->pre_pt_base, 
>> &signal_pt->pre_pt_cb);
>> +
>> +
>> +    return 0;
>> +out:
>> +    kfree(signal_pt);
>> +    return ret;
>> +}
>> +
>> +/**
>> + * drm_syncobj_signal_fence - place fence into a sync object.
>> + * @syncobj: Sync object to replace fence in
>> + * @fence: fence to install in sync file.
>> + *
>> + * This places the fence into normal or timeline sync object
>> + */
>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>> +                  struct dma_fence *fence,
>> +                  u64 point)
>> +{
>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>> +        drm_syncobj_replace_fence(syncobj, fence);
>> +    else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>> +        drm_syncobj_timeline_signal_fence(syncobj, fence, point);
>> +}
>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>> +
>>   static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>   {
>>       struct drm_syncobj_stub_fence *fence;
>> @@ -205,12 +390,150 @@ static int 
>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>         return 0;
>>   }
>> +static struct drm_syncobj_wait_pt *
>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 
>> point)
>> +{
>> +    struct rb_node *node = 
>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +
>> +    spin_lock(&syncobj->lock);
>> +    while(node) {
>> +        int result = point - wait_pt->value;
>> +
>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +        if (result < 0)
>> +            node = node->rb_left;
>> +        else if (result > 0)
>> +            node = node->rb_right;
>> +        else
>> +            break;
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +
>> +    return wait_pt;
>> +}
>> +
>> +static struct drm_syncobj_wait_pt *
>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 
>> point)
>> +{
>> +    struct drm_syncobj_wait_pt *wait_pt;
>> +    struct rb_node **new = 
>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>> +
>> +    wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>> +    if (!wait_pt)
>> +        return NULL;
>> +    spin_lock_init(&wait_pt->base.lock);
>> +    dma_fence_init(&wait_pt->base.base,
>> +               &drm_syncobj_stub_fence_ops,
>> +               &wait_pt->base.lock,
>> +               syncobj->syncobj_timeline.timeline_context, point);
>> +    wait_pt->submission_fence = NULL;
>> +    /* check if the pt is already sumbitted, if yes, then don't need to
>> +     * create submission fence, otherwise create it */
>> +    if (point > syncobj->syncobj_timeline.signal_point) {
>> +        wait_pt->submission_fence =
>> +            kzalloc(sizeof(struct drm_syncobj_stub_fence),
>> +                GFP_KERNEL);
>> +        if (!wait_pt->submission_fence) {
>> +            dma_fence_put(&wait_pt->base.base);
>> +            return NULL;
>> +        }
>> + spin_lock_init(&wait_pt->submission_fence->lock);
>> + dma_fence_init(&wait_pt->submission_fence->base,
>> +                   &drm_syncobj_stub_fence_ops,
>> +                   &wait_pt->submission_fence->lock,
>> + syncobj->syncobj_timeline.submission_context,
>> +                   point);
>> +    }
>> +    wait_pt->value = point;
>> +
>> +    /* wait pt must be in an order, so that we can easily lookup and 
>> signal
>> +     * it */
>> +    spin_lock(&syncobj->lock);
>> +    if (point <= syncobj->syncobj_timeline.timeline)
>> +        dma_fence_signal(&wait_pt->base.base);
>> +    if (wait_pt->submission_fence &&
>> +        point <= syncobj->syncobj_timeline.signal_point)
>> + dma_fence_signal(&wait_pt->submission_fence->base);
>> +    while(*new) {
>> +        struct drm_syncobj_wait_pt *this =
>> +            rb_entry(*new, struct drm_syncobj_wait_pt, node);
>> +        int result = wait_pt->value - this->value;
>> +
>> +        parent = *new;
>> +        if (result < 0)
>> +            new = &((*new)->rb_left);
>> +        else if (result > 0)
>> +            new = &((*new)->rb_right);
>> +        else
>> +            goto exist;
>> +    }
>> +
>> +    rb_link_node(&wait_pt->node, parent, new);
>> +    rb_insert_color(&wait_pt->node, 
>> &syncobj->syncobj_timeline.wait_pt_tree);
>> +    spin_unlock(&syncobj->lock);
>> +    return wait_pt;
>> +exist:
>> +    spin_unlock(&syncobj->lock);
>> +    if (wait_pt->submission_fence)
>> + dma_fence_put(&wait_pt->submission_fence->base);
>> +    dma_fence_put(&wait_pt->base.base);
>> +    wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>> +    return wait_pt;
>> +}
>> +
>> +static struct dma_fence *
>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 
>> point, u64 flag)
>> +{
>> +    struct drm_syncobj_wait_pt *wait_pt;
>> +
>> +    /* already signaled, simply return a signaled stub fence */
>> +    if (point <= syncobj->syncobj_timeline.timeline) {
>> +        struct drm_syncobj_stub_fence *fence;
>> +
>> +        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>> +        if (fence == NULL)
>> +            return NULL;
>> +
>> +        spin_lock_init(&fence->lock);
>> +        dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>> +                   &fence->lock, 0, 0);
>> +        dma_fence_signal(&fence->base);
>> +        return &fence->base;
>> +    }
>> +
>> +    /* check if the wait pt exists */
>> +    wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>> +    if (!wait_pt) {
>> +        /* This is a new wait pt, so create it */
>> +        wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
>> +        if (!wait_pt)
>> +            return NULL;
>> +    }
>> +    if (wait_pt) {
>> +        struct dma_fence *fence;
>> +        if (wait_pt->submission_fence) {
>> +            if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> + dma_fence_wait(&wait_pt->submission_fence->base, true);
>> +            else if 
>> (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>> +                return NULL;
>> +        }
>> +        rcu_read_lock();
>> +        fence = dma_fence_get_rcu(&wait_pt->base.base);
>> +        rcu_read_unlock();
>> +        return fence;
>> +    }
>> +    return NULL;
>> +}
>> +
>>     /**
>>    * drm_syncobj_find_fence - lookup and reference the fence in a 
>> sync object
>>    * @file_private: drm file private pointer
>>    * @handle: sync object handle to lookup.
>>    * @fence: out parameter for the fence
>> + * @point: timeline point
>>    *
>>    * This is just a convenience function that combines 
>> drm_syncobj_find() and
>>    * drm_syncobj_fence_get().
>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct 
>> drm_syncobj *syncobj)
>>    */
>>   int drm_syncobj_find_fence(struct drm_file *file_private,
>>                  u32 handle,
>> -               struct dma_fence **fence)
>> +               struct dma_fence **fence,
>> +               u64 point)
>>   {
>>       struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>> handle);
>>       int ret = 0;
>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file 
>> *file_private,
>>       if (!syncobj)
>>           return -ENOENT;
>>   -    *fence = drm_syncobj_fence_get(syncobj);
>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>> +        *fence = drm_syncobj_fence_get(syncobj);
>> +    }else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +        *fence = drm_syncobj_timeline_point_get(syncobj, point,
>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>> +    } else {
>> +        DRM_ERROR("invalid syncobj type\n");
>> +        return -EINVAL;
>> +    }
>>       if (!*fence) {
>>           ret = -EINVAL;
>>       }
>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file 
>> *file_private,
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_find_fence);
>>   +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>> +                      struct drm_syncobj_timeline *syncobj_timeline)
>> +{
>> +    struct rb_node *node = NULL;
>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +    struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>> +        node != NULL; ) {
>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +        node = rb_next(node);
>> +        rb_erase(&wait_pt->node,
>> +             &syncobj_timeline->wait_pt_tree);
>> +        RB_CLEAR_NODE(&wait_pt->node);
>> +        if (wait_pt->submission_fence)
>> + dma_fence_put(&wait_pt->submission_fence->base);
>> +        /* kfree(wait_pt) is excuted by fence put */
>> +        dma_fence_put(&wait_pt->base.base);
>> +    }
>> +    list_for_each_entry_safe(signal_pt, tmp,
>> +                 &syncobj_timeline->signal_pt_list, list) {
>> +        list_del(&signal_pt->list);
>> +        dma_fence_put(signal_pt->signal_fence);
>> +        dma_fence_put(signal_pt->pre_pt_base);
>> +        dma_fence_put(&signal_pt->base.base);
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +}
>>   /**
>>    * drm_syncobj_free - free a sync object.
>>    * @kref: kref to free.
>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>                              struct drm_syncobj,
>>                              refcount);
>>       drm_syncobj_replace_fence(syncobj, NULL);
>> +    drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>       kfree(syncobj);
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_free);
>>   +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>> +                      *syncobj_timeline)
>> +{
>> +    syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>> +    syncobj_timeline->submission_context = dma_fence_context_alloc(1);
>> +    syncobj_timeline->timeline = 0;
>> +    syncobj_timeline->signal_point = 0;
>> +
>> +    syncobj_timeline->wait_pt_tree = RB_ROOT;
>> +    INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>> +}
>>   /**
>>    * drm_syncobj_create - create a new syncobj
>>    * @out_syncobj: returned syncobj
>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj 
>> **out_syncobj, uint32_t flags,
>>       kref_init(&syncobj->refcount);
>>       INIT_LIST_HEAD(&syncobj->cb_list);
>>       spin_lock_init(&syncobj->lock);
>> +    if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>> +        syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>> +    } else {
>> +        syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>> +    }
>>         if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>           ret = drm_syncobj_assign_null_handle(syncobj);
>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct 
>> drm_file *file_private,
>>       if (fd < 0)
>>           return fd;
>>   -    ret = drm_syncobj_find_fence(file_private, handle, &fence);
>> +    ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>       if (ret)
>>           goto err_put_fd;
>>   diff --git a/drivers/gpu/drm/v3d/v3d_gem.c 
>> b/drivers/gpu/drm/v3d/v3d_gem.c
>> index 9029590267aa..f24c3eccb4d5 100644
>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, 
>> void *data,
>>       kref_init(&exec->refcount);
>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>> -                     &exec->bin.in_fence);
>> +                     &exec->bin.in_fence, 0);
>>       if (ret == -EINVAL)
>>           goto fail;
>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>> -                     &exec->render.in_fence);
>> +                     &exec->render.in_fence, 0);
>>       if (ret == -EINVAL)
>>           goto fail;
>>   diff --git a/drivers/gpu/drm/vc4/vc4_gem.c 
>> b/drivers/gpu/drm/vc4/vc4_gem.c
>> index 7910b9acedd6..f7b4971342e8 100644
>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, 
>> void *data,
>>         if (args->in_sync) {
>>           ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>> -                         &in_fence);
>> +                         &in_fence, 0);
>>           if (ret)
>>               goto fail;
>>   diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>> index b04c492ddbb5..b6e193c6cc9a 100644
>> --- a/include/drm/drm_syncobj.h
>> +++ b/include/drm/drm_syncobj.h
>> @@ -54,6 +54,41 @@ const struct dma_fence_ops 
>> drm_syncobj_stub_fence_ops = {
>>       .release = NULL,
>>   };
>>   +enum drm_syncobj_type {
>> +    DRM_SYNCOBJ_TYPE_NORMAL,
>> +    DRM_SYNCOBJ_TYPE_TIMELINE
>> +};
>> +
>> +struct drm_syncobj;
>> +struct drm_syncobj_wait_pt {
>> +    struct drm_syncobj_stub_fence base;
>> +    struct drm_syncobj_stub_fence *submission_fence;
>> +    u64    value;
>> +    struct rb_node   node;
>> +};
>> +struct drm_syncobj_signal_pt {
>> +    struct drm_syncobj_stub_fence base;
>> +    struct dma_fence *signal_fence;
>> +    struct dma_fence *pre_pt_base;
>> +    struct dma_fence_cb signal_cb;
>> +    struct dma_fence_cb pre_pt_cb;
>> +    struct drm_syncobj *syncobj;
>> +    u64    value;
>> +    struct list_head list;
>> +};
>> +struct drm_syncobj_timeline {
>> +    u64 timeline_context;
>> +    u64 submission_context;
>> +    /**
>> +     * @timeline: syncobj timeline
>> +     */
>> +    u64 timeline;
>> +    u64 signal_point;
>> +
>> +
>> +    struct rb_root wait_pt_tree;
>> +    struct list_head signal_pt_list;
>> +};
>>   /**
>>    * struct drm_syncobj - sync object.
>>    *
>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>        * @refcount: Reference count of this object.
>>        */
>>       struct kref refcount;
>> +    /**
>> +     * @type: indicate syncobj type
>> +     */
>> +    enum drm_syncobj_type type;
>> +    struct drm_syncobj_timeline syncobj_timeline;
>>       /**
>>        * @fence:
>>        * NULL or a pointer to the fence bound to this object.
>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct 
>> drm_syncobj *syncobj,
>>                    struct drm_syncobj_cb *cb);
>>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>                      struct dma_fence *fence);
>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>> +                  struct dma_fence *fence,
>> +                  u64 point);
>>   int drm_syncobj_find_fence(struct drm_file *file_private,
>>                  u32 handle,
>> -               struct dma_fence **fence);
>> +               struct dma_fence **fence, u64 point);
>>   void drm_syncobj_free(struct kref *kref);
>>   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t 
>> flags,
>>                  struct dma_fence *fence);
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 300f336633f2..71e6cd1c88f8 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>   struct drm_syncobj_create {
>>       __u32 handle;
>>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>       __u32 flags;
>>   };
>>   @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>       __s32 fd;
>>       __u32 pad;
>>   };
>> -
>>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>   struct drm_syncobj_wait {
>
On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
> VK_KHR_timeline_semaphore:
> This extension introduces a new type of semaphore that has an integer payload
> identifying a point in a timeline. Such timeline semaphores support the
> following operations:
>   * Host query - A host operation that allows querying the payload of the
>     timeline semaphore.
>   * Host wait - A host operation that allows a blocking wait for a
>     timeline semaphore to reach a specified value.
>   * Device wait - A device operation that allows waiting for a
>     timeline semaphore to reach a specified value.
>   * Device signal - A device operation that allows advancing the
>     timeline semaphore to a specified value.
> 
> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
> a. signal PT design:
> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
> the timeline will increase to value of PT[N].
> b. wait PT design:
> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
> perform that.
> 
> TODO:
> CPU query and wait on timeline semaphore.

Another TODO: igt testcase for all the cornercases. We already have
other syncobj tests in there.

That would also help with understanding how this is supposed to be used,
since I'm a bit too dense to immediately get your algorithm by just
staring at the code.


> Change-Id: I9f09aae225e268442c30451badac40406f24262c
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Cc: Christian Konig <christian.koenig@amd.com>
> Cc: Dave Airlie <airlied@redhat.com>
> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>  drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>  drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>  include/drm/drm_syncobj.h              |  45 +++-
>  include/uapi/drm/drm.h                 |   3 +-
>  6 files changed, 435 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index d42d1c8f78f6..463cc8960723 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>  {
>  	int r;
>  	struct dma_fence *fence;
> -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
> +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>  	if (r)
>  		return r;
>  
> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>  {
>  	int i;
>  
> -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
> -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
> +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
> +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
> +	}
>  }
>  
>  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 70af32d0def1..3709f36c901e 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>  }
>  EXPORT_SYMBOL(drm_syncobj_replace_fence);
>  
> +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +	    node != NULL; node = rb_next(node)) {
> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
> +		    if (wait_pt->submission_fence)
> +			dma_fence_signal(&wait_pt->submission_fence->base);
> +	    } else {
> +		    /* the loop is from left to right, the later entry value is
> +		     * bigger, so don't need to check any more */
> +		    break;
> +	    }

This seems to reinvet syncobj->cb_list. Since this is the exact same
"future fence that doesn't even exist yet" problem I think those two code
path should be unified. In general I think it'd be much better if we treat
the old syncobj as a timeline with a limit of 1 slot only.

Or there needs to be a really good reason why all new code.

Specifically for this stuff here having unified future fence semantics
will allow drivers to do clever stuff with them.

> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
> +			dma_fence_signal(&wait_pt->base.base);
> +			rb_erase(&wait_pt->node,
> +				 &syncobj->syncobj_timeline.wait_pt_tree);
> +			RB_CLEAR_NODE(&wait_pt->node);
> +			if (wait_pt->submission_fence)
> +				dma_fence_put(&wait_pt->submission_fence->base);
> +			/* kfree(wait_pt) is excuted by fence put */
> +			dma_fence_put(&wait_pt->base.base);
> +		} else {
> +			/* the loop is from left to right, the later entry value is
> +			 * bigger, so don't need to check any more */
> +			break;
> +		}
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
> +
> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
> +{
> +	struct dma_fence *fence = NULL;
> +	struct drm_syncobj *syncobj;
> +
> +	fence = signal_pt->signal_fence;
> +	signal_pt->signal_fence = NULL;
> +	dma_fence_put(fence);
> +	fence = signal_pt->pre_pt_base;
> +	signal_pt->pre_pt_base = NULL;
> +	dma_fence_put(fence);
> +
> +	syncobj = signal_pt->syncobj;
> +	spin_lock(&syncobj->lock);
> +	list_del(&signal_pt->list);
> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
> +	spin_unlock(&syncobj->lock);
> +	/* kfree(signal_pt) will be  executed by below fence put */
> +	dma_fence_put(&signal_pt->base.base);
> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
> +}
> +static void pt_signal_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
> +
> +	if (signal_pt->pre_pt_base &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +static void pt_pre_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
> +
> +	if (signal_pt->signal_fence &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +
> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
> +					     struct dma_fence *fence,
> +					     u64 point)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> +	struct drm_syncobj_signal_pt *tail_pt;
> +	struct dma_fence *tail_pt_fence = NULL;
> +	int ret = 0;
> +
> +	if (!signal_pt)
> +		return -ENOMEM;
> +	if (syncobj->syncobj_timeline.signal_point >= point) {
> +		DRM_WARN("A later signal is ready!");
> +		goto out;
> +	}
> +	if (fence)
> +		dma_fence_get(fence);
> +	spin_lock(&syncobj->lock);
> +	spin_lock_init(&signal_pt->base.lock);
> +	dma_fence_init(&signal_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &signal_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	signal_pt->signal_fence =
> +		rcu_dereference_protected(fence,
> +					  lockdep_is_held(&fence->lock));
> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
> +					  struct drm_syncobj_signal_pt, list);
> +		tail_pt_fence = &tail_pt->base.base;
> +		if (dma_fence_is_signaled(tail_pt_fence))
> +			tail_pt_fence = NULL;
> +	}
> +	if (tail_pt_fence)
> +		signal_pt->pre_pt_base =
> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
> +								lockdep_is_held(&tail_pt_fence->lock)));
> +
> +	signal_pt->value = point;
> +	syncobj->syncobj_timeline.signal_point = point;
> +	signal_pt->syncobj = syncobj;
> +	INIT_LIST_HEAD(&signal_pt->list);
> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
> +	spin_unlock(&syncobj->lock);
> +	drm_syncobj_timeline_signal_submission_fences(syncobj);
> +	/**
> +	 * Every pt is depending on signal fence and previous pt fence, add
> +	 * callbacks to them
> +	 */
> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
> +		dma_fence_add_callback(signal_pt->signal_fence,
> +				       &signal_pt->signal_cb,
> +				       pt_signal_fence_func);
> +	else
> +		pt_signal_fence_func(signal_pt->signal_fence,
> +				     &signal_pt->signal_cb);
> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		dma_fence_add_callback(signal_pt->pre_pt_base,
> +				       &signal_pt->pre_pt_cb,
> +				       pt_pre_fence_func);
> +	else
> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
> +
> +
> +	return 0;
> +out:
> +	kfree(signal_pt);
> +	return ret;
> +}
> +
> +/**
> + * drm_syncobj_signal_fence - place fence into a sync object.
> + * @syncobj: Sync object to replace fence in
> + * @fence: fence to install in sync file.
> + *
> + * This places the fence into normal or timeline sync object
> + */
> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> +			      struct dma_fence *fence,
> +			      u64 point)

This is a very confusion function name. Feels like
drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
just extend replace_fence like you do with find_fence already.

> +{
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
> +		drm_syncobj_replace_fence(syncobj, fence);
> +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
> +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
> +}
> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
> +
>  static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>  {
>  	struct drm_syncobj_stub_fence *fence;
> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>  
>  	return 0;
>  }
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +
> +    spin_lock(&syncobj->lock);
> +    while(node) {
> +	    int result = point - wait_pt->value;
> +
> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	    if (result < 0)
> +		    node = node->rb_left;
> +	    else if (result > 0)
> +		    node = node->rb_right;
> +	    else
> +		    break;
> +    }
> +    spin_unlock(&syncobj->lock);
> +
> +    return wait_pt;
> +}
> +
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
> +
> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
> +	if (!wait_pt)
> +		return NULL;
> +	spin_lock_init(&wait_pt->base.lock);
> +	dma_fence_init(&wait_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &wait_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	wait_pt->submission_fence = NULL;
> +	/* check if the pt is already sumbitted, if yes, then don't need to
> +	 * create submission fence, otherwise create it */
> +	if (point > syncobj->syncobj_timeline.signal_point) {
> +		wait_pt->submission_fence =
> +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
> +				GFP_KERNEL);
> +		if (!wait_pt->submission_fence) {
> +			dma_fence_put(&wait_pt->base.base);
> +			return NULL;
> +		}
> +		spin_lock_init(&wait_pt->submission_fence->lock);
> +		dma_fence_init(&wait_pt->submission_fence->base,
> +			       &drm_syncobj_stub_fence_ops,
> +			       &wait_pt->submission_fence->lock,
> +			       syncobj->syncobj_timeline.submission_context,
> +			       point);
> +	}
> +	wait_pt->value = point;
> +
> +	/* wait pt must be in an order, so that we can easily lookup and signal
> +	 * it */
> +	spin_lock(&syncobj->lock);
> +	if (point <= syncobj->syncobj_timeline.timeline)
> +		dma_fence_signal(&wait_pt->base.base);
> +	if (wait_pt->submission_fence &&
> +	    point <= syncobj->syncobj_timeline.signal_point)
> +		dma_fence_signal(&wait_pt->submission_fence->base);
> +	while(*new) {
> +		struct drm_syncobj_wait_pt *this =
> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
> +		int result = wait_pt->value - this->value;
> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			goto exist;
> +	}
> +
> +	rb_link_node(&wait_pt->node, parent, new);
> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
> +	spin_unlock(&syncobj->lock);
> +	return wait_pt;
> +exist:
> +	spin_unlock(&syncobj->lock);
> +	if (wait_pt->submission_fence)
> +		dma_fence_put(&wait_pt->submission_fence->base);
> +	dma_fence_put(&wait_pt->base.base);
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	return wait_pt;
> +}
> +
> +static struct dma_fence *
> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +
> +	/* already signaled, simply return a signaled stub fence */
> +	if (point <= syncobj->syncobj_timeline.timeline) {
> +		struct drm_syncobj_stub_fence *fence;
> +
> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +		if (fence == NULL)
> +			return NULL;
> +
> +		spin_lock_init(&fence->lock);
> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
> +			       &fence->lock, 0, 0);
> +		dma_fence_signal(&fence->base);
> +		return &fence->base;
> +	}
> +
> +	/* check if the wait pt exists */
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	if (!wait_pt) {
> +		/* This is a new wait pt, so create it */
> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
> +		if (!wait_pt)
> +			return NULL;
> +	}
> +	if (wait_pt) {
> +		struct dma_fence *fence;
> +		if (wait_pt->submission_fence) {
> +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +				dma_fence_wait(&wait_pt->submission_fence->base, true);
> +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
> +				return NULL;
> +		}
> +		rcu_read_lock();
> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
> +		rcu_read_unlock();
> +		return fence;
> +	}
> +	return NULL;
> +}
> +
>  
>  /**
>   * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>   * @file_private: drm file private pointer
>   * @handle: sync object handle to lookup.
>   * @fence: out parameter for the fence
> + * @point: timeline point
>   *
>   * This is just a convenience function that combines drm_syncobj_find() and
>   * drm_syncobj_fence_get().
> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   */
>  int drm_syncobj_find_fence(struct drm_file *file_private,
>  			   u32 handle,
> -			   struct dma_fence **fence)
> +			   struct dma_fence **fence,
> +			   u64 point)
>  {
>  	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>  	int ret = 0;
> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>  	if (!syncobj)
>  		return -ENOENT;
>  
> -	*fence = drm_syncobj_fence_get(syncobj);
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> +		*fence = drm_syncobj_fence_get(syncobj);

Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
driver bug. Or userspace bug, in which case we need to return -EINVAL;

Hard to tell without the driver implementation.

> +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
> +	} else {
> +		DRM_ERROR("invalid syncobj type\n");
> +		return -EINVAL;
> +	}
>  	if (!*fence) {
>  		ret = -EINVAL;
>  	}
> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>  }
>  EXPORT_SYMBOL(drm_syncobj_find_fence);
>  
> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
> +				      struct drm_syncobj_timeline *syncobj_timeline)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		rb_erase(&wait_pt->node,
> +			 &syncobj_timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		if (wait_pt->submission_fence)
> +			dma_fence_put(&wait_pt->submission_fence->base);
> +		/* kfree(wait_pt) is excuted by fence put */
> +		dma_fence_put(&wait_pt->base.base);
> +	}
> +	list_for_each_entry_safe(signal_pt, tmp,
> +				 &syncobj_timeline->signal_pt_list, list) {
> +		list_del(&signal_pt->list);
> +		dma_fence_put(signal_pt->signal_fence);
> +		dma_fence_put(signal_pt->pre_pt_base);
> +		dma_fence_put(&signal_pt->base.base);
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
>  /**
>   * drm_syncobj_free - free a sync object.
>   * @kref: kref to free.
> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>  						   struct drm_syncobj,
>  						   refcount);
>  	drm_syncobj_replace_fence(syncobj, NULL);
> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>  	kfree(syncobj);
>  }
>  EXPORT_SYMBOL(drm_syncobj_free);
>  
> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
> +				      *syncobj_timeline)
> +{
> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->timeline = 0;
> +	syncobj_timeline->signal_point = 0;
> +
> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
> +}
>  /**
>   * drm_syncobj_create - create a new syncobj
>   * @out_syncobj: returned syncobj
> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>  	kref_init(&syncobj->refcount);
>  	INIT_LIST_HEAD(&syncobj->cb_list);
>  	spin_lock_init(&syncobj->lock);
> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
> +	} else {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
> +	}
>  
>  	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>  		ret = drm_syncobj_assign_null_handle(syncobj);
> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
>  	if (fd < 0)
>  		return fd;
>  
> -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
> +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>  	if (ret)
>  		goto err_put_fd;
>  
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 9029590267aa..f24c3eccb4d5 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	kref_init(&exec->refcount);
>  
>  	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
> -				     &exec->bin.in_fence);
> +				     &exec->bin.in_fence, 0);
>  	if (ret == -EINVAL)
>  		goto fail;
>  
>  	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
> -				     &exec->render.in_fence);
> +				     &exec->render.in_fence, 0);
>  	if (ret == -EINVAL)
>  		goto fail;
>  
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 7910b9acedd6..f7b4971342e8 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  
>  	if (args->in_sync) {
>  		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
> -					     &in_fence);
> +					     &in_fence, 0);
>  		if (ret)
>  			goto fail;
>  
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index b04c492ddbb5..b6e193c6cc9a 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>  	.release = NULL,
>  };
>  
> +enum drm_syncobj_type {
> +	DRM_SYNCOBJ_TYPE_NORMAL,
> +	DRM_SYNCOBJ_TYPE_TIMELINE
> +};
> +
> +struct drm_syncobj;
> +struct drm_syncobj_wait_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct drm_syncobj_stub_fence *submission_fence;
> +	u64    value;
> +	struct rb_node   node;
> +};
> +struct drm_syncobj_signal_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct dma_fence *signal_fence;
> +	struct dma_fence *pre_pt_base;
> +	struct dma_fence_cb signal_cb;
> +	struct dma_fence_cb pre_pt_cb;
> +	struct drm_syncobj *syncobj;
> +	u64    value;
> +	struct list_head list;
> +};

The above internal structs shouldn't be in the public header.

Some tiny comments about what each piece is for would also go a long way
towards me being able to understand things.

Thanks, Daniel

> +struct drm_syncobj_timeline {
> +	u64 timeline_context;
> +	u64 submission_context;
> +	/**
> +	 * @timeline: syncobj timeline
> +	 */
> +	u64 timeline;
> +	u64 signal_point;
> +
> +
> +	struct rb_root wait_pt_tree;
> +	struct list_head signal_pt_list;
> +};
>  /**
>   * struct drm_syncobj - sync object.
>   *
> @@ -64,6 +99,11 @@ struct drm_syncobj {
>  	 * @refcount: Reference count of this object.
>  	 */
>  	struct kref refcount;
> +	/**
> +	 * @type: indicate syncobj type
> +	 */
> +	enum drm_syncobj_type type;
> +	struct drm_syncobj_timeline syncobj_timeline;
>  	/**
>  	 * @fence:
>  	 * NULL or a pointer to the fence bound to this object.
> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>  				 struct drm_syncobj_cb *cb);
>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>  			       struct dma_fence *fence);
> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> +			      struct dma_fence *fence,
> +			      u64 point);
>  int drm_syncobj_find_fence(struct drm_file *file_private,
>  			   u32 handle,
> -			   struct dma_fence **fence);
> +			   struct dma_fence **fence, u64 point);
>  void drm_syncobj_free(struct kref *kref);
>  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>  		       struct dma_fence *fence);
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 300f336633f2..71e6cd1c88f8 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>  struct drm_syncobj_create {
>  	__u32 handle;
>  #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>  	__u32 flags;
>  };
>  
> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>  	__s32 fd;
>  	__u32 pad;
>  };
> -
>  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>  struct drm_syncobj_wait {
> -- 
> 2.14.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 2018年08月22日 17:24, Daniel Vetter wrote:
> On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
>> VK_KHR_timeline_semaphore:
>> This extension introduces a new type of semaphore that has an integer payload
>> identifying a point in a timeline. Such timeline semaphores support the
>> following operations:
>>    * Host query - A host operation that allows querying the payload of the
>>      timeline semaphore.
>>    * Host wait - A host operation that allows a blocking wait for a
>>      timeline semaphore to reach a specified value.
>>    * Device wait - A device operation that allows waiting for a
>>      timeline semaphore to reach a specified value.
>>    * Device signal - A device operation that allows advancing the
>>      timeline semaphore to a specified value.
>>
>> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
>> a. signal PT design:
>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
>> the timeline will increase to value of PT[N].
>> b. wait PT design:
>> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
>> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
>> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
>> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
>> perform that.
>>
>> TODO:
>> CPU query and wait on timeline semaphore.
> Another TODO: igt testcase for all the cornercases. We already have
> other syncobj tests in there.
Yes, I'm also trying to find where test should be wrote, Could you give 
a directory?

Thanks,
David Zhou
>
> That would also help with understanding how this is supposed to be used,
> since I'm a bit too dense to immediately get your algorithm by just
> staring at the code.
>
>
>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Christian Konig <christian.koenig@amd.com>
>> Cc: Dave Airlie <airlied@redhat.com>
>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>   drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>   drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>   include/drm/drm_syncobj.h              |  45 +++-
>>   include/uapi/drm/drm.h                 |   3 +-
>>   6 files changed, 435 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d42d1c8f78f6..463cc8960723 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>   {
>>   	int r;
>>   	struct dma_fence *fence;
>> -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
>> +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>   	if (r)
>>   		return r;
>>   
>> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>   {
>>   	int i;
>>   
>> -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>> -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
>> +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>> +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
>> +	}
>>   }
>>   
>>   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 70af32d0def1..3709f36c901e 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>   
>> +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
>> +{
>> +	struct rb_node *node = NULL;
>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +	spin_lock(&syncobj->lock);
>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>> +	    node != NULL; node = rb_next(node)) {
>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
>> +		    if (wait_pt->submission_fence)
>> +			dma_fence_signal(&wait_pt->submission_fence->base);
>> +	    } else {
>> +		    /* the loop is from left to right, the later entry value is
>> +		     * bigger, so don't need to check any more */
>> +		    break;
>> +	    }
> This seems to reinvet syncobj->cb_list. Since this is the exact same
> "future fence that doesn't even exist yet" problem I think those two code
> path should be unified. In general I think it'd be much better if we treat
> the old syncobj as a timeline with a limit of 1 slot only.
>
> Or there needs to be a really good reason why all new code.
>
> Specifically for this stuff here having unified future fence semantics
> will allow drivers to do clever stuff with them.
>
>> +	}
>> +	spin_unlock(&syncobj->lock);
>> +}
>> +
>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
>> +{
>> +	struct rb_node *node = NULL;
>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +	spin_lock(&syncobj->lock);
>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>> +	    node != NULL; ) {
>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +		node = rb_next(node);
>> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>> +			dma_fence_signal(&wait_pt->base.base);
>> +			rb_erase(&wait_pt->node,
>> +				 &syncobj->syncobj_timeline.wait_pt_tree);
>> +			RB_CLEAR_NODE(&wait_pt->node);
>> +			if (wait_pt->submission_fence)
>> +				dma_fence_put(&wait_pt->submission_fence->base);
>> +			/* kfree(wait_pt) is excuted by fence put */
>> +			dma_fence_put(&wait_pt->base.base);
>> +		} else {
>> +			/* the loop is from left to right, the later entry value is
>> +			 * bigger, so don't need to check any more */
>> +			break;
>> +		}
>> +	}
>> +	spin_unlock(&syncobj->lock);
>> +}
>> +
>> +
>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>> +{
>> +	struct dma_fence *fence = NULL;
>> +	struct drm_syncobj *syncobj;
>> +
>> +	fence = signal_pt->signal_fence;
>> +	signal_pt->signal_fence = NULL;
>> +	dma_fence_put(fence);
>> +	fence = signal_pt->pre_pt_base;
>> +	signal_pt->pre_pt_base = NULL;
>> +	dma_fence_put(fence);
>> +
>> +	syncobj = signal_pt->syncobj;
>> +	spin_lock(&syncobj->lock);
>> +	list_del(&signal_pt->list);
>> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
>> +	spin_unlock(&syncobj->lock);
>> +	/* kfree(signal_pt) will be  executed by below fence put */
>> +	dma_fence_put(&signal_pt->base.base);
>> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
>> +}
>> +static void pt_signal_fence_func(struct dma_fence *fence,
>> +				 struct dma_fence_cb *cb)
>> +{
>> +	struct drm_syncobj_signal_pt *signal_pt =
>> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>> +
>> +	if (signal_pt->pre_pt_base &&
>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +		return;
>> +
>> +	pt_fence_cb(signal_pt);
>> +}
>> +static void pt_pre_fence_func(struct dma_fence *fence,
>> +				 struct dma_fence_cb *cb)
>> +{
>> +	struct drm_syncobj_signal_pt *signal_pt =
>> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>> +
>> +	if (signal_pt->signal_fence &&
>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +		return;
>> +
>> +	pt_fence_cb(signal_pt);
>> +}
>> +
>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
>> +					     struct dma_fence *fence,
>> +					     u64 point)
>> +{
>> +	struct drm_syncobj_signal_pt *signal_pt =
>> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>> +	struct drm_syncobj_signal_pt *tail_pt;
>> +	struct dma_fence *tail_pt_fence = NULL;
>> +	int ret = 0;
>> +
>> +	if (!signal_pt)
>> +		return -ENOMEM;
>> +	if (syncobj->syncobj_timeline.signal_point >= point) {
>> +		DRM_WARN("A later signal is ready!");
>> +		goto out;
>> +	}
>> +	if (fence)
>> +		dma_fence_get(fence);
>> +	spin_lock(&syncobj->lock);
>> +	spin_lock_init(&signal_pt->base.lock);
>> +	dma_fence_init(&signal_pt->base.base,
>> +		       &drm_syncobj_stub_fence_ops,
>> +		       &signal_pt->base.lock,
>> +		       syncobj->syncobj_timeline.timeline_context, point);
>> +	signal_pt->signal_fence =
>> +		rcu_dereference_protected(fence,
>> +					  lockdep_is_held(&fence->lock));
>> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>> +					  struct drm_syncobj_signal_pt, list);
>> +		tail_pt_fence = &tail_pt->base.base;
>> +		if (dma_fence_is_signaled(tail_pt_fence))
>> +			tail_pt_fence = NULL;
>> +	}
>> +	if (tail_pt_fence)
>> +		signal_pt->pre_pt_base =
>> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>> +								lockdep_is_held(&tail_pt_fence->lock)));
>> +
>> +	signal_pt->value = point;
>> +	syncobj->syncobj_timeline.signal_point = point;
>> +	signal_pt->syncobj = syncobj;
>> +	INIT_LIST_HEAD(&signal_pt->list);
>> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
>> +	spin_unlock(&syncobj->lock);
>> +	drm_syncobj_timeline_signal_submission_fences(syncobj);
>> +	/**
>> +	 * Every pt is depending on signal fence and previous pt fence, add
>> +	 * callbacks to them
>> +	 */
>> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
>> +		dma_fence_add_callback(signal_pt->signal_fence,
>> +				       &signal_pt->signal_cb,
>> +				       pt_signal_fence_func);
>> +	else
>> +		pt_signal_fence_func(signal_pt->signal_fence,
>> +				     &signal_pt->signal_cb);
>> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
>> +		dma_fence_add_callback(signal_pt->pre_pt_base,
>> +				       &signal_pt->pre_pt_cb,
>> +				       pt_pre_fence_func);
>> +	else
>> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
>> +
>> +
>> +	return 0;
>> +out:
>> +	kfree(signal_pt);
>> +	return ret;
>> +}
>> +
>> +/**
>> + * drm_syncobj_signal_fence - place fence into a sync object.
>> + * @syncobj: Sync object to replace fence in
>> + * @fence: fence to install in sync file.
>> + *
>> + * This places the fence into normal or timeline sync object
>> + */
>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>> +			      struct dma_fence *fence,
>> +			      u64 point)
> This is a very confusion function name. Feels like
> drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
> just extend replace_fence like you do with find_fence already.
>
>> +{
>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>> +		drm_syncobj_replace_fence(syncobj, fence);
>> +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>> +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
>> +}
>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>> +
>>   static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>   {
>>   	struct drm_syncobj_stub_fence *fence;
>> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>   
>>   	return 0;
>>   }
>> +static struct drm_syncobj_wait_pt *
>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
>> +{
>> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +
>> +
>> +    spin_lock(&syncobj->lock);
>> +    while(node) {
>> +	    int result = point - wait_pt->value;
>> +
>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +	    if (result < 0)
>> +		    node = node->rb_left;
>> +	    else if (result > 0)
>> +		    node = node->rb_right;
>> +	    else
>> +		    break;
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +
>> +    return wait_pt;
>> +}
>> +
>> +static struct drm_syncobj_wait_pt *
>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
>> +{
>> +	struct drm_syncobj_wait_pt *wait_pt;
>> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>> +
>> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>> +	if (!wait_pt)
>> +		return NULL;
>> +	spin_lock_init(&wait_pt->base.lock);
>> +	dma_fence_init(&wait_pt->base.base,
>> +		       &drm_syncobj_stub_fence_ops,
>> +		       &wait_pt->base.lock,
>> +		       syncobj->syncobj_timeline.timeline_context, point);
>> +	wait_pt->submission_fence = NULL;
>> +	/* check if the pt is already sumbitted, if yes, then don't need to
>> +	 * create submission fence, otherwise create it */
>> +	if (point > syncobj->syncobj_timeline.signal_point) {
>> +		wait_pt->submission_fence =
>> +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
>> +				GFP_KERNEL);
>> +		if (!wait_pt->submission_fence) {
>> +			dma_fence_put(&wait_pt->base.base);
>> +			return NULL;
>> +		}
>> +		spin_lock_init(&wait_pt->submission_fence->lock);
>> +		dma_fence_init(&wait_pt->submission_fence->base,
>> +			       &drm_syncobj_stub_fence_ops,
>> +			       &wait_pt->submission_fence->lock,
>> +			       syncobj->syncobj_timeline.submission_context,
>> +			       point);
>> +	}
>> +	wait_pt->value = point;
>> +
>> +	/* wait pt must be in an order, so that we can easily lookup and signal
>> +	 * it */
>> +	spin_lock(&syncobj->lock);
>> +	if (point <= syncobj->syncobj_timeline.timeline)
>> +		dma_fence_signal(&wait_pt->base.base);
>> +	if (wait_pt->submission_fence &&
>> +	    point <= syncobj->syncobj_timeline.signal_point)
>> +		dma_fence_signal(&wait_pt->submission_fence->base);
>> +	while(*new) {
>> +		struct drm_syncobj_wait_pt *this =
>> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
>> +		int result = wait_pt->value - this->value;
>> +
>> +		parent = *new;
>> +		if (result < 0)
>> +			new = &((*new)->rb_left);
>> +		else if (result > 0)
>> +			new = &((*new)->rb_right);
>> +		else
>> +			goto exist;
>> +	}
>> +
>> +	rb_link_node(&wait_pt->node, parent, new);
>> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
>> +	spin_unlock(&syncobj->lock);
>> +	return wait_pt;
>> +exist:
>> +	spin_unlock(&syncobj->lock);
>> +	if (wait_pt->submission_fence)
>> +		dma_fence_put(&wait_pt->submission_fence->base);
>> +	dma_fence_put(&wait_pt->base.base);
>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>> +	return wait_pt;
>> +}
>> +
>> +static struct dma_fence *
>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
>> +{
>> +	struct drm_syncobj_wait_pt *wait_pt;
>> +
>> +	/* already signaled, simply return a signaled stub fence */
>> +	if (point <= syncobj->syncobj_timeline.timeline) {
>> +		struct drm_syncobj_stub_fence *fence;
>> +
>> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>> +		if (fence == NULL)
>> +			return NULL;
>> +
>> +		spin_lock_init(&fence->lock);
>> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>> +			       &fence->lock, 0, 0);
>> +		dma_fence_signal(&fence->base);
>> +		return &fence->base;
>> +	}
>> +
>> +	/* check if the wait pt exists */
>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>> +	if (!wait_pt) {
>> +		/* This is a new wait pt, so create it */
>> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
>> +		if (!wait_pt)
>> +			return NULL;
>> +	}
>> +	if (wait_pt) {
>> +		struct dma_fence *fence;
>> +		if (wait_pt->submission_fence) {
>> +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> +				dma_fence_wait(&wait_pt->submission_fence->base, true);
>> +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>> +				return NULL;
>> +		}
>> +		rcu_read_lock();
>> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
>> +		rcu_read_unlock();
>> +		return fence;
>> +	}
>> +	return NULL;
>> +}
>> +
>>   
>>   /**
>>    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>>    * @file_private: drm file private pointer
>>    * @handle: sync object handle to lookup.
>>    * @fence: out parameter for the fence
>> + * @point: timeline point
>>    *
>>    * This is just a convenience function that combines drm_syncobj_find() and
>>    * drm_syncobj_fence_get().
>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>    */
>>   int drm_syncobj_find_fence(struct drm_file *file_private,
>>   			   u32 handle,
>> -			   struct dma_fence **fence)
>> +			   struct dma_fence **fence,
>> +			   u64 point)
>>   {
>>   	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>>   	int ret = 0;
>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>   	if (!syncobj)
>>   		return -ENOENT;
>>   
>> -	*fence = drm_syncobj_fence_get(syncobj);
>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>> +		*fence = drm_syncobj_fence_get(syncobj);
> Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
> driver bug. Or userspace bug, in which case we need to return -EINVAL;
>
> Hard to tell without the driver implementation.
>
>> +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
>> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>> +	} else {
>> +		DRM_ERROR("invalid syncobj type\n");
>> +		return -EINVAL;
>> +	}
>>   	if (!*fence) {
>>   		ret = -EINVAL;
>>   	}
>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_find_fence);
>>   
>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>> +				      struct drm_syncobj_timeline *syncobj_timeline)
>> +{
>> +	struct rb_node *node = NULL;
>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>> +
>> +	spin_lock(&syncobj->lock);
>> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>> +	    node != NULL; ) {
>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>> +		node = rb_next(node);
>> +		rb_erase(&wait_pt->node,
>> +			 &syncobj_timeline->wait_pt_tree);
>> +		RB_CLEAR_NODE(&wait_pt->node);
>> +		if (wait_pt->submission_fence)
>> +			dma_fence_put(&wait_pt->submission_fence->base);
>> +		/* kfree(wait_pt) is excuted by fence put */
>> +		dma_fence_put(&wait_pt->base.base);
>> +	}
>> +	list_for_each_entry_safe(signal_pt, tmp,
>> +				 &syncobj_timeline->signal_pt_list, list) {
>> +		list_del(&signal_pt->list);
>> +		dma_fence_put(signal_pt->signal_fence);
>> +		dma_fence_put(signal_pt->pre_pt_base);
>> +		dma_fence_put(&signal_pt->base.base);
>> +	}
>> +	spin_unlock(&syncobj->lock);
>> +}
>>   /**
>>    * drm_syncobj_free - free a sync object.
>>    * @kref: kref to free.
>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>   						   struct drm_syncobj,
>>   						   refcount);
>>   	drm_syncobj_replace_fence(syncobj, NULL);
>> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>   	kfree(syncobj);
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_free);
>>   
>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>> +				      *syncobj_timeline)
>> +{
>> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>> +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
>> +	syncobj_timeline->timeline = 0;
>> +	syncobj_timeline->signal_point = 0;
>> +
>> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
>> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>> +}
>>   /**
>>    * drm_syncobj_create - create a new syncobj
>>    * @out_syncobj: returned syncobj
>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>   	kref_init(&syncobj->refcount);
>>   	INIT_LIST_HEAD(&syncobj->cb_list);
>>   	spin_lock_init(&syncobj->lock);
>> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>> +	} else {
>> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>> +	}
>>   
>>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>   		ret = drm_syncobj_assign_null_handle(syncobj);
>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
>>   	if (fd < 0)
>>   		return fd;
>>   
>> -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
>> +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>   	if (ret)
>>   		goto err_put_fd;
>>   
>> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
>> index 9029590267aa..f24c3eccb4d5 100644
>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>>   	kref_init(&exec->refcount);
>>   
>>   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>> -				     &exec->bin.in_fence);
>> +				     &exec->bin.in_fence, 0);
>>   	if (ret == -EINVAL)
>>   		goto fail;
>>   
>>   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>> -				     &exec->render.in_fence);
>> +				     &exec->render.in_fence, 0);
>>   	if (ret == -EINVAL)
>>   		goto fail;
>>   
>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
>> index 7910b9acedd6..f7b4971342e8 100644
>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>>   
>>   	if (args->in_sync) {
>>   		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>> -					     &in_fence);
>> +					     &in_fence, 0);
>>   		if (ret)
>>   			goto fail;
>>   
>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>> index b04c492ddbb5..b6e193c6cc9a 100644
>> --- a/include/drm/drm_syncobj.h
>> +++ b/include/drm/drm_syncobj.h
>> @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>   	.release = NULL,
>>   };
>>   
>> +enum drm_syncobj_type {
>> +	DRM_SYNCOBJ_TYPE_NORMAL,
>> +	DRM_SYNCOBJ_TYPE_TIMELINE
>> +};
>> +
>> +struct drm_syncobj;
>> +struct drm_syncobj_wait_pt {
>> +	struct drm_syncobj_stub_fence base;
>> +	struct drm_syncobj_stub_fence *submission_fence;
>> +	u64    value;
>> +	struct rb_node   node;
>> +};
>> +struct drm_syncobj_signal_pt {
>> +	struct drm_syncobj_stub_fence base;
>> +	struct dma_fence *signal_fence;
>> +	struct dma_fence *pre_pt_base;
>> +	struct dma_fence_cb signal_cb;
>> +	struct dma_fence_cb pre_pt_cb;
>> +	struct drm_syncobj *syncobj;
>> +	u64    value;
>> +	struct list_head list;
>> +};
> The above internal structs shouldn't be in the public header.
>
> Some tiny comments about what each piece is for would also go a long way
> towards me being able to understand things.
>
> Thanks, Daniel
>
>> +struct drm_syncobj_timeline {
>> +	u64 timeline_context;
>> +	u64 submission_context;
>> +	/**
>> +	 * @timeline: syncobj timeline
>> +	 */
>> +	u64 timeline;
>> +	u64 signal_point;
>> +
>> +
>> +	struct rb_root wait_pt_tree;
>> +	struct list_head signal_pt_list;
>> +};
>>   /**
>>    * struct drm_syncobj - sync object.
>>    *
>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>   	 * @refcount: Reference count of this object.
>>   	 */
>>   	struct kref refcount;
>> +	/**
>> +	 * @type: indicate syncobj type
>> +	 */
>> +	enum drm_syncobj_type type;
>> +	struct drm_syncobj_timeline syncobj_timeline;
>>   	/**
>>   	 * @fence:
>>   	 * NULL or a pointer to the fence bound to this object.
>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>   				 struct drm_syncobj_cb *cb);
>>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>   			       struct dma_fence *fence);
>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>> +			      struct dma_fence *fence,
>> +			      u64 point);
>>   int drm_syncobj_find_fence(struct drm_file *file_private,
>>   			   u32 handle,
>> -			   struct dma_fence **fence);
>> +			   struct dma_fence **fence, u64 point);
>>   void drm_syncobj_free(struct kref *kref);
>>   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>   		       struct dma_fence *fence);
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 300f336633f2..71e6cd1c88f8 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>   struct drm_syncobj_create {
>>   	__u32 handle;
>>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>   	__u32 flags;
>>   };
>>   
>> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>   	__s32 fd;
>>   	__u32 pad;
>>   };
>> -
>>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>   struct drm_syncobj_wait {
>> -- 
>> 2.14.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Wed, Aug 22, 2018 at 05:28:17PM +0800, zhoucm1 wrote:
> 
> 
> On 2018年08月22日 17:24, Daniel Vetter wrote:
> > On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
> > > VK_KHR_timeline_semaphore:
> > > This extension introduces a new type of semaphore that has an integer payload
> > > identifying a point in a timeline. Such timeline semaphores support the
> > > following operations:
> > >    * Host query - A host operation that allows querying the payload of the
> > >      timeline semaphore.
> > >    * Host wait - A host operation that allows a blocking wait for a
> > >      timeline semaphore to reach a specified value.
> > >    * Device wait - A device operation that allows waiting for a
> > >      timeline semaphore to reach a specified value.
> > >    * Device signal - A device operation that allows advancing the
> > >      timeline semaphore to a specified value.
> > > 
> > > Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
> > > a. signal PT design:
> > > Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
> > > the timeline will increase to value of PT[N].
> > > b. wait PT design:
> > > Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
> > > wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
> > > signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
> > > so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
> > > perform that.
> > > 
> > > TODO:
> > > CPU query and wait on timeline semaphore.
> > Another TODO: igt testcase for all the cornercases. We already have
> > other syncobj tests in there.
> Yes, I'm also trying to find where test should be wrote, Could you give a
> directory?

There's already tests/syncobj_basic.c and tests/syncobj_wait.c. Either
extend those, or probably better to start a new tests/syncobj_timeline.c
since I expect this will have a lot of corner-cases we need to check.
-Daniel

> 
> Thanks,
> David Zhou
> > 
> > That would also help with understanding how this is supposed to be used,
> > since I'm a bit too dense to immediately get your algorithm by just
> > staring at the code.
> > 
> > 
> > > Change-Id: I9f09aae225e268442c30451badac40406f24262c
> > > Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> > > Cc: Christian Konig <christian.koenig@amd.com>
> > > Cc: Dave Airlie <airlied@redhat.com>
> > > Cc: Daniel Rakos <Daniel.Rakos@amd.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
> > >   drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
> > >   drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
> > >   drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
> > >   include/drm/drm_syncobj.h              |  45 +++-
> > >   include/uapi/drm/drm.h                 |   3 +-
> > >   6 files changed, 435 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > index d42d1c8f78f6..463cc8960723 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
> > >   {
> > >   	int r;
> > >   	struct dma_fence *fence;
> > > -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
> > > +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
> > >   	if (r)
> > >   		return r;
> > > @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> > >   {
> > >   	int i;
> > > -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
> > > -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
> > > +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
> > > +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
> > > +	}
> > >   }
> > >   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> > > index 70af32d0def1..3709f36c901e 100644
> > > --- a/drivers/gpu/drm/drm_syncobj.c
> > > +++ b/drivers/gpu/drm/drm_syncobj.c
> > > @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
> > >   }
> > >   EXPORT_SYMBOL(drm_syncobj_replace_fence);
> > > +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
> > > +{
> > > +	struct rb_node *node = NULL;
> > > +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> > > +
> > > +	spin_lock(&syncobj->lock);
> > > +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> > > +	    node != NULL; node = rb_next(node)) {
> > > +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> > > +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
> > > +		    if (wait_pt->submission_fence)
> > > +			dma_fence_signal(&wait_pt->submission_fence->base);
> > > +	    } else {
> > > +		    /* the loop is from left to right, the later entry value is
> > > +		     * bigger, so don't need to check any more */
> > > +		    break;
> > > +	    }
> > This seems to reinvet syncobj->cb_list. Since this is the exact same
> > "future fence that doesn't even exist yet" problem I think those two code
> > path should be unified. In general I think it'd be much better if we treat
> > the old syncobj as a timeline with a limit of 1 slot only.
> > 
> > Or there needs to be a really good reason why all new code.
> > 
> > Specifically for this stuff here having unified future fence semantics
> > will allow drivers to do clever stuff with them.
> > 
> > > +	}
> > > +	spin_unlock(&syncobj->lock);
> > > +}
> > > +
> > > +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
> > > +{
> > > +	struct rb_node *node = NULL;
> > > +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> > > +
> > > +	spin_lock(&syncobj->lock);
> > > +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> > > +	    node != NULL; ) {
> > > +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> > > +		node = rb_next(node);
> > > +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
> > > +			dma_fence_signal(&wait_pt->base.base);
> > > +			rb_erase(&wait_pt->node,
> > > +				 &syncobj->syncobj_timeline.wait_pt_tree);
> > > +			RB_CLEAR_NODE(&wait_pt->node);
> > > +			if (wait_pt->submission_fence)
> > > +				dma_fence_put(&wait_pt->submission_fence->base);
> > > +			/* kfree(wait_pt) is excuted by fence put */
> > > +			dma_fence_put(&wait_pt->base.base);
> > > +		} else {
> > > +			/* the loop is from left to right, the later entry value is
> > > +			 * bigger, so don't need to check any more */
> > > +			break;
> > > +		}
> > > +	}
> > > +	spin_unlock(&syncobj->lock);
> > > +}
> > > +
> > > +
> > > +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
> > > +{
> > > +	struct dma_fence *fence = NULL;
> > > +	struct drm_syncobj *syncobj;
> > > +
> > > +	fence = signal_pt->signal_fence;
> > > +	signal_pt->signal_fence = NULL;
> > > +	dma_fence_put(fence);
> > > +	fence = signal_pt->pre_pt_base;
> > > +	signal_pt->pre_pt_base = NULL;
> > > +	dma_fence_put(fence);
> > > +
> > > +	syncobj = signal_pt->syncobj;
> > > +	spin_lock(&syncobj->lock);
> > > +	list_del(&signal_pt->list);
> > > +	syncobj->syncobj_timeline.timeline = signal_pt->value;
> > > +	spin_unlock(&syncobj->lock);
> > > +	/* kfree(signal_pt) will be  executed by below fence put */
> > > +	dma_fence_put(&signal_pt->base.base);
> > > +	drm_syncobj_timeline_signal_wait_pts(syncobj);
> > > +}
> > > +static void pt_signal_fence_func(struct dma_fence *fence,
> > > +				 struct dma_fence_cb *cb)
> > > +{
> > > +	struct drm_syncobj_signal_pt *signal_pt =
> > > +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
> > > +
> > > +	if (signal_pt->pre_pt_base &&
> > > +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> > > +		return;
> > > +
> > > +	pt_fence_cb(signal_pt);
> > > +}
> > > +static void pt_pre_fence_func(struct dma_fence *fence,
> > > +				 struct dma_fence_cb *cb)
> > > +{
> > > +	struct drm_syncobj_signal_pt *signal_pt =
> > > +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
> > > +
> > > +	if (signal_pt->signal_fence &&
> > > +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> > > +		return;
> > > +
> > > +	pt_fence_cb(signal_pt);
> > > +}
> > > +
> > > +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
> > > +					     struct dma_fence *fence,
> > > +					     u64 point)
> > > +{
> > > +	struct drm_syncobj_signal_pt *signal_pt =
> > > +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> > > +	struct drm_syncobj_signal_pt *tail_pt;
> > > +	struct dma_fence *tail_pt_fence = NULL;
> > > +	int ret = 0;
> > > +
> > > +	if (!signal_pt)
> > > +		return -ENOMEM;
> > > +	if (syncobj->syncobj_timeline.signal_point >= point) {
> > > +		DRM_WARN("A later signal is ready!");
> > > +		goto out;
> > > +	}
> > > +	if (fence)
> > > +		dma_fence_get(fence);
> > > +	spin_lock(&syncobj->lock);
> > > +	spin_lock_init(&signal_pt->base.lock);
> > > +	dma_fence_init(&signal_pt->base.base,
> > > +		       &drm_syncobj_stub_fence_ops,
> > > +		       &signal_pt->base.lock,
> > > +		       syncobj->syncobj_timeline.timeline_context, point);
> > > +	signal_pt->signal_fence =
> > > +		rcu_dereference_protected(fence,
> > > +					  lockdep_is_held(&fence->lock));
> > > +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
> > > +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
> > > +					  struct drm_syncobj_signal_pt, list);
> > > +		tail_pt_fence = &tail_pt->base.base;
> > > +		if (dma_fence_is_signaled(tail_pt_fence))
> > > +			tail_pt_fence = NULL;
> > > +	}
> > > +	if (tail_pt_fence)
> > > +		signal_pt->pre_pt_base =
> > > +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
> > > +								lockdep_is_held(&tail_pt_fence->lock)));
> > > +
> > > +	signal_pt->value = point;
> > > +	syncobj->syncobj_timeline.signal_point = point;
> > > +	signal_pt->syncobj = syncobj;
> > > +	INIT_LIST_HEAD(&signal_pt->list);
> > > +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
> > > +	spin_unlock(&syncobj->lock);
> > > +	drm_syncobj_timeline_signal_submission_fences(syncobj);
> > > +	/**
> > > +	 * Every pt is depending on signal fence and previous pt fence, add
> > > +	 * callbacks to them
> > > +	 */
> > > +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
> > > +		dma_fence_add_callback(signal_pt->signal_fence,
> > > +				       &signal_pt->signal_cb,
> > > +				       pt_signal_fence_func);
> > > +	else
> > > +		pt_signal_fence_func(signal_pt->signal_fence,
> > > +				     &signal_pt->signal_cb);
> > > +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
> > > +		dma_fence_add_callback(signal_pt->pre_pt_base,
> > > +				       &signal_pt->pre_pt_cb,
> > > +				       pt_pre_fence_func);
> > > +	else
> > > +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
> > > +
> > > +
> > > +	return 0;
> > > +out:
> > > +	kfree(signal_pt);
> > > +	return ret;
> > > +}
> > > +
> > > +/**
> > > + * drm_syncobj_signal_fence - place fence into a sync object.
> > > + * @syncobj: Sync object to replace fence in
> > > + * @fence: fence to install in sync file.
> > > + *
> > > + * This places the fence into normal or timeline sync object
> > > + */
> > > +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> > > +			      struct dma_fence *fence,
> > > +			      u64 point)
> > This is a very confusion function name. Feels like
> > drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
> > just extend replace_fence like you do with find_fence already.
> > 
> > > +{
> > > +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
> > > +		drm_syncobj_replace_fence(syncobj, fence);
> > > +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
> > > +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
> > > +}
> > > +EXPORT_SYMBOL(drm_syncobj_signal_fence);
> > > +
> > >   static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> > >   {
> > >   	struct drm_syncobj_stub_fence *fence;
> > > @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> > >   	return 0;
> > >   }
> > > +static struct drm_syncobj_wait_pt *
> > > +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
> > > +{
> > > +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
> > > +    struct drm_syncobj_wait_pt *wait_pt = NULL;
> > > +
> > > +
> > > +    spin_lock(&syncobj->lock);
> > > +    while(node) {
> > > +	    int result = point - wait_pt->value;
> > > +
> > > +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> > > +	    if (result < 0)
> > > +		    node = node->rb_left;
> > > +	    else if (result > 0)
> > > +		    node = node->rb_right;
> > > +	    else
> > > +		    break;
> > > +    }
> > > +    spin_unlock(&syncobj->lock);
> > > +
> > > +    return wait_pt;
> > > +}
> > > +
> > > +static struct drm_syncobj_wait_pt *
> > > +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
> > > +{
> > > +	struct drm_syncobj_wait_pt *wait_pt;
> > > +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
> > > +
> > > +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
> > > +	if (!wait_pt)
> > > +		return NULL;
> > > +	spin_lock_init(&wait_pt->base.lock);
> > > +	dma_fence_init(&wait_pt->base.base,
> > > +		       &drm_syncobj_stub_fence_ops,
> > > +		       &wait_pt->base.lock,
> > > +		       syncobj->syncobj_timeline.timeline_context, point);
> > > +	wait_pt->submission_fence = NULL;
> > > +	/* check if the pt is already sumbitted, if yes, then don't need to
> > > +	 * create submission fence, otherwise create it */
> > > +	if (point > syncobj->syncobj_timeline.signal_point) {
> > > +		wait_pt->submission_fence =
> > > +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
> > > +				GFP_KERNEL);
> > > +		if (!wait_pt->submission_fence) {
> > > +			dma_fence_put(&wait_pt->base.base);
> > > +			return NULL;
> > > +		}
> > > +		spin_lock_init(&wait_pt->submission_fence->lock);
> > > +		dma_fence_init(&wait_pt->submission_fence->base,
> > > +			       &drm_syncobj_stub_fence_ops,
> > > +			       &wait_pt->submission_fence->lock,
> > > +			       syncobj->syncobj_timeline.submission_context,
> > > +			       point);
> > > +	}
> > > +	wait_pt->value = point;
> > > +
> > > +	/* wait pt must be in an order, so that we can easily lookup and signal
> > > +	 * it */
> > > +	spin_lock(&syncobj->lock);
> > > +	if (point <= syncobj->syncobj_timeline.timeline)
> > > +		dma_fence_signal(&wait_pt->base.base);
> > > +	if (wait_pt->submission_fence &&
> > > +	    point <= syncobj->syncobj_timeline.signal_point)
> > > +		dma_fence_signal(&wait_pt->submission_fence->base);
> > > +	while(*new) {
> > > +		struct drm_syncobj_wait_pt *this =
> > > +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
> > > +		int result = wait_pt->value - this->value;
> > > +
> > > +		parent = *new;
> > > +		if (result < 0)
> > > +			new = &((*new)->rb_left);
> > > +		else if (result > 0)
> > > +			new = &((*new)->rb_right);
> > > +		else
> > > +			goto exist;
> > > +	}
> > > +
> > > +	rb_link_node(&wait_pt->node, parent, new);
> > > +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
> > > +	spin_unlock(&syncobj->lock);
> > > +	return wait_pt;
> > > +exist:
> > > +	spin_unlock(&syncobj->lock);
> > > +	if (wait_pt->submission_fence)
> > > +		dma_fence_put(&wait_pt->submission_fence->base);
> > > +	dma_fence_put(&wait_pt->base.base);
> > > +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> > > +	return wait_pt;
> > > +}
> > > +
> > > +static struct dma_fence *
> > > +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
> > > +{
> > > +	struct drm_syncobj_wait_pt *wait_pt;
> > > +
> > > +	/* already signaled, simply return a signaled stub fence */
> > > +	if (point <= syncobj->syncobj_timeline.timeline) {
> > > +		struct drm_syncobj_stub_fence *fence;
> > > +
> > > +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> > > +		if (fence == NULL)
> > > +			return NULL;
> > > +
> > > +		spin_lock_init(&fence->lock);
> > > +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
> > > +			       &fence->lock, 0, 0);
> > > +		dma_fence_signal(&fence->base);
> > > +		return &fence->base;
> > > +	}
> > > +
> > > +	/* check if the wait pt exists */
> > > +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> > > +	if (!wait_pt) {
> > > +		/* This is a new wait pt, so create it */
> > > +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
> > > +		if (!wait_pt)
> > > +			return NULL;
> > > +	}
> > > +	if (wait_pt) {
> > > +		struct dma_fence *fence;
> > > +		if (wait_pt->submission_fence) {
> > > +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> > > +				dma_fence_wait(&wait_pt->submission_fence->base, true);
> > > +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
> > > +				return NULL;
> > > +		}
> > > +		rcu_read_lock();
> > > +		fence = dma_fence_get_rcu(&wait_pt->base.base);
> > > +		rcu_read_unlock();
> > > +		return fence;
> > > +	}
> > > +	return NULL;
> > > +}
> > > +
> > >   /**
> > >    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
> > >    * @file_private: drm file private pointer
> > >    * @handle: sync object handle to lookup.
> > >    * @fence: out parameter for the fence
> > > + * @point: timeline point
> > >    *
> > >    * This is just a convenience function that combines drm_syncobj_find() and
> > >    * drm_syncobj_fence_get().
> > > @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> > >    */
> > >   int drm_syncobj_find_fence(struct drm_file *file_private,
> > >   			   u32 handle,
> > > -			   struct dma_fence **fence)
> > > +			   struct dma_fence **fence,
> > > +			   u64 point)
> > >   {
> > >   	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> > >   	int ret = 0;
> > > @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
> > >   	if (!syncobj)
> > >   		return -ENOENT;
> > > -	*fence = drm_syncobj_fence_get(syncobj);
> > > +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> > > +		*fence = drm_syncobj_fence_get(syncobj);
> > Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
> > driver bug. Or userspace bug, in which case we need to return -EINVAL;
> > 
> > Hard to tell without the driver implementation.
> > 
> > > +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> > > +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
> > > +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
> > > +	} else {
> > > +		DRM_ERROR("invalid syncobj type\n");
> > > +		return -EINVAL;
> > > +	}
> > >   	if (!*fence) {
> > >   		ret = -EINVAL;
> > >   	}
> > > @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
> > >   }
> > >   EXPORT_SYMBOL(drm_syncobj_find_fence);
> > > +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
> > > +				      struct drm_syncobj_timeline *syncobj_timeline)
> > > +{
> > > +	struct rb_node *node = NULL;
> > > +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> > > +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
> > > +
> > > +	spin_lock(&syncobj->lock);
> > > +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
> > > +	    node != NULL; ) {
> > > +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> > > +		node = rb_next(node);
> > > +		rb_erase(&wait_pt->node,
> > > +			 &syncobj_timeline->wait_pt_tree);
> > > +		RB_CLEAR_NODE(&wait_pt->node);
> > > +		if (wait_pt->submission_fence)
> > > +			dma_fence_put(&wait_pt->submission_fence->base);
> > > +		/* kfree(wait_pt) is excuted by fence put */
> > > +		dma_fence_put(&wait_pt->base.base);
> > > +	}
> > > +	list_for_each_entry_safe(signal_pt, tmp,
> > > +				 &syncobj_timeline->signal_pt_list, list) {
> > > +		list_del(&signal_pt->list);
> > > +		dma_fence_put(signal_pt->signal_fence);
> > > +		dma_fence_put(signal_pt->pre_pt_base);
> > > +		dma_fence_put(&signal_pt->base.base);
> > > +	}
> > > +	spin_unlock(&syncobj->lock);
> > > +}
> > >   /**
> > >    * drm_syncobj_free - free a sync object.
> > >    * @kref: kref to free.
> > > @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
> > >   						   struct drm_syncobj,
> > >   						   refcount);
> > >   	drm_syncobj_replace_fence(syncobj, NULL);
> > > +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
> > >   	kfree(syncobj);
> > >   }
> > >   EXPORT_SYMBOL(drm_syncobj_free);
> > > +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
> > > +				      *syncobj_timeline)
> > > +{
> > > +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
> > > +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
> > > +	syncobj_timeline->timeline = 0;
> > > +	syncobj_timeline->signal_point = 0;
> > > +
> > > +	syncobj_timeline->wait_pt_tree = RB_ROOT;
> > > +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
> > > +}
> > >   /**
> > >    * drm_syncobj_create - create a new syncobj
> > >    * @out_syncobj: returned syncobj
> > > @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
> > >   	kref_init(&syncobj->refcount);
> > >   	INIT_LIST_HEAD(&syncobj->cb_list);
> > >   	spin_lock_init(&syncobj->lock);
> > > +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
> > > +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> > > +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
> > > +	} else {
> > > +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
> > > +	}
> > >   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
> > >   		ret = drm_syncobj_assign_null_handle(syncobj);
> > > @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
> > >   	if (fd < 0)
> > >   		return fd;
> > > -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
> > > +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
> > >   	if (ret)
> > >   		goto err_put_fd;
> > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> > > index 9029590267aa..f24c3eccb4d5 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > > @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> > >   	kref_init(&exec->refcount);
> > >   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
> > > -				     &exec->bin.in_fence);
> > > +				     &exec->bin.in_fence, 0);
> > >   	if (ret == -EINVAL)
> > >   		goto fail;
> > >   	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
> > > -				     &exec->render.in_fence);
> > > +				     &exec->render.in_fence, 0);
> > >   	if (ret == -EINVAL)
> > >   		goto fail;
> > > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> > > index 7910b9acedd6..f7b4971342e8 100644
> > > --- a/drivers/gpu/drm/vc4/vc4_gem.c
> > > +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> > > @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
> > >   	if (args->in_sync) {
> > >   		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
> > > -					     &in_fence);
> > > +					     &in_fence, 0);
> > >   		if (ret)
> > >   			goto fail;
> > > diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> > > index b04c492ddbb5..b6e193c6cc9a 100644
> > > --- a/include/drm/drm_syncobj.h
> > > +++ b/include/drm/drm_syncobj.h
> > > @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
> > >   	.release = NULL,
> > >   };
> > > +enum drm_syncobj_type {
> > > +	DRM_SYNCOBJ_TYPE_NORMAL,
> > > +	DRM_SYNCOBJ_TYPE_TIMELINE
> > > +};
> > > +
> > > +struct drm_syncobj;
> > > +struct drm_syncobj_wait_pt {
> > > +	struct drm_syncobj_stub_fence base;
> > > +	struct drm_syncobj_stub_fence *submission_fence;
> > > +	u64    value;
> > > +	struct rb_node   node;
> > > +};
> > > +struct drm_syncobj_signal_pt {
> > > +	struct drm_syncobj_stub_fence base;
> > > +	struct dma_fence *signal_fence;
> > > +	struct dma_fence *pre_pt_base;
> > > +	struct dma_fence_cb signal_cb;
> > > +	struct dma_fence_cb pre_pt_cb;
> > > +	struct drm_syncobj *syncobj;
> > > +	u64    value;
> > > +	struct list_head list;
> > > +};
> > The above internal structs shouldn't be in the public header.
> > 
> > Some tiny comments about what each piece is for would also go a long way
> > towards me being able to understand things.
> > 
> > Thanks, Daniel
> > 
> > > +struct drm_syncobj_timeline {
> > > +	u64 timeline_context;
> > > +	u64 submission_context;
> > > +	/**
> > > +	 * @timeline: syncobj timeline
> > > +	 */
> > > +	u64 timeline;
> > > +	u64 signal_point;
> > > +
> > > +
> > > +	struct rb_root wait_pt_tree;
> > > +	struct list_head signal_pt_list;
> > > +};
> > >   /**
> > >    * struct drm_syncobj - sync object.
> > >    *
> > > @@ -64,6 +99,11 @@ struct drm_syncobj {
> > >   	 * @refcount: Reference count of this object.
> > >   	 */
> > >   	struct kref refcount;
> > > +	/**
> > > +	 * @type: indicate syncobj type
> > > +	 */
> > > +	enum drm_syncobj_type type;
> > > +	struct drm_syncobj_timeline syncobj_timeline;
> > >   	/**
> > >   	 * @fence:
> > >   	 * NULL or a pointer to the fence bound to this object.
> > > @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
> > >   				 struct drm_syncobj_cb *cb);
> > >   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
> > >   			       struct dma_fence *fence);
> > > +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
> > > +			      struct dma_fence *fence,
> > > +			      u64 point);
> > >   int drm_syncobj_find_fence(struct drm_file *file_private,
> > >   			   u32 handle,
> > > -			   struct dma_fence **fence);
> > > +			   struct dma_fence **fence, u64 point);
> > >   void drm_syncobj_free(struct kref *kref);
> > >   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
> > >   		       struct dma_fence *fence);
> > > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> > > index 300f336633f2..71e6cd1c88f8 100644
> > > --- a/include/uapi/drm/drm.h
> > > +++ b/include/uapi/drm/drm.h
> > > @@ -717,6 +717,8 @@ struct drm_prime_handle {
> > >   struct drm_syncobj_create {
> > >   	__u32 handle;
> > >   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
> > > +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
> > > +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
> > >   	__u32 flags;
> > >   };
> > > @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
> > >   	__s32 fd;
> > >   	__u32 pad;
> > >   };
> > > -
> > >   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> > >   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
> > >   struct drm_syncobj_wait {
> > > -- 
> > > 2.14.1
> > > 
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
On 2018年08月22日 17:31, Daniel Vetter wrote:
> On Wed, Aug 22, 2018 at 05:28:17PM +0800, zhoucm1 wrote:
>>
>> On 2018年08月22日 17:24, Daniel Vetter wrote:
>>> On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
>>>> VK_KHR_timeline_semaphore:
>>>> This extension introduces a new type of semaphore that has an integer payload
>>>> identifying a point in a timeline. Such timeline semaphores support the
>>>> following operations:
>>>>     * Host query - A host operation that allows querying the payload of the
>>>>       timeline semaphore.
>>>>     * Host wait - A host operation that allows a blocking wait for a
>>>>       timeline semaphore to reach a specified value.
>>>>     * Device wait - A device operation that allows waiting for a
>>>>       timeline semaphore to reach a specified value.
>>>>     * Device signal - A device operation that allows advancing the
>>>>       timeline semaphore to a specified value.
>>>>
>>>> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
>>>> a. signal PT design:
>>>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
>>>> the timeline will increase to value of PT[N].
>>>> b. wait PT design:
>>>> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
>>>> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
>>>> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
>>>> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
>>>> perform that.
>>>>
>>>> TODO:
>>>> CPU query and wait on timeline semaphore.
>>> Another TODO: igt testcase for all the cornercases. We already have
>>> other syncobj tests in there.
>> Yes, I'm also trying to find where test should be wrote, Could you give a
>> directory?
> There's already tests/syncobj_basic.c and tests/syncobj_wait.c. Either
> extend those, or probably better to start a new tests/syncobj_timeline.c
> since I expect this will have a lot of corner-cases we need to check.
I failed to find them in both kernel and libdrm, Could you point which 
test you said?

Thanks,
David Zhou
> -Daniel
>
>> Thanks,
>> David Zhou
>>> That would also help with understanding how this is supposed to be used,
>>> since I'm a bit too dense to immediately get your algorithm by just
>>> staring at the code.
>>>
>>>
>>>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>> Cc: Christian Konig <christian.koenig@amd.com>
>>>> Cc: Dave Airlie <airlied@redhat.com>
>>>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>>>    drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
>>>>    drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>>>    drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>>>    include/drm/drm_syncobj.h              |  45 +++-
>>>>    include/uapi/drm/drm.h                 |   3 +-
>>>>    6 files changed, 435 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index d42d1c8f78f6..463cc8960723 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>>>    {
>>>>    	int r;
>>>>    	struct dma_fence *fence;
>>>> -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
>>>> +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>>>    	if (r)
>>>>    		return r;
>>>> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>>>    {
>>>>    	int i;
>>>> -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>>>> -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
>>>> +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>>>> +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
>>>> +	}
>>>>    }
>>>>    static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>> index 70af32d0def1..3709f36c901e 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>> +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	    node != NULL; node = rb_next(node)) {
>>>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
>>>> +		    if (wait_pt->submission_fence)
>>>> +			dma_fence_signal(&wait_pt->submission_fence->base);
>>>> +	    } else {
>>>> +		    /* the loop is from left to right, the later entry value is
>>>> +		     * bigger, so don't need to check any more */
>>>> +		    break;
>>>> +	    }
>>> This seems to reinvet syncobj->cb_list. Since this is the exact same
>>> "future fence that doesn't even exist yet" problem I think those two code
>>> path should be unified. In general I think it'd be much better if we treat
>>> the old syncobj as a timeline with a limit of 1 slot only.
>>>
>>> Or there needs to be a really good reason why all new code.
>>>
>>> Specifically for this stuff here having unified future fence semantics
>>> will allow drivers to do clever stuff with them.
>>>
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>> +
>>>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	    node != NULL; ) {
>>>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +		node = rb_next(node);
>>>> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>>>> +			dma_fence_signal(&wait_pt->base.base);
>>>> +			rb_erase(&wait_pt->node,
>>>> +				 &syncobj->syncobj_timeline.wait_pt_tree);
>>>> +			RB_CLEAR_NODE(&wait_pt->node);
>>>> +			if (wait_pt->submission_fence)
>>>> +				dma_fence_put(&wait_pt->submission_fence->base);
>>>> +			/* kfree(wait_pt) is excuted by fence put */
>>>> +			dma_fence_put(&wait_pt->base.base);
>>>> +		} else {
>>>> +			/* the loop is from left to right, the later entry value is
>>>> +			 * bigger, so don't need to check any more */
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>> +
>>>> +
>>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>> +{
>>>> +	struct dma_fence *fence = NULL;
>>>> +	struct drm_syncobj *syncobj;
>>>> +
>>>> +	fence = signal_pt->signal_fence;
>>>> +	signal_pt->signal_fence = NULL;
>>>> +	dma_fence_put(fence);
>>>> +	fence = signal_pt->pre_pt_base;
>>>> +	signal_pt->pre_pt_base = NULL;
>>>> +	dma_fence_put(fence);
>>>> +
>>>> +	syncobj = signal_pt->syncobj;
>>>> +	spin_lock(&syncobj->lock);
>>>> +	list_del(&signal_pt->list);
>>>> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	/* kfree(signal_pt) will be  executed by below fence put */
>>>> +	dma_fence_put(&signal_pt->base.base);
>>>> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>> +}
>>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>>> +				 struct dma_fence_cb *cb)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>>>> +
>>>> +	if (signal_pt->pre_pt_base &&
>>>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		return;
>>>> +
>>>> +	pt_fence_cb(signal_pt);
>>>> +}
>>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>>> +				 struct dma_fence_cb *cb)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>>>> +
>>>> +	if (signal_pt->signal_fence &&
>>>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		return;
>>>> +
>>>> +	pt_fence_cb(signal_pt);
>>>> +}
>>>> +
>>>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
>>>> +					     struct dma_fence *fence,
>>>> +					     u64 point)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>>> +	struct drm_syncobj_signal_pt *tail_pt;
>>>> +	struct dma_fence *tail_pt_fence = NULL;
>>>> +	int ret = 0;
>>>> +
>>>> +	if (!signal_pt)
>>>> +		return -ENOMEM;
>>>> +	if (syncobj->syncobj_timeline.signal_point >= point) {
>>>> +		DRM_WARN("A later signal is ready!");
>>>> +		goto out;
>>>> +	}
>>>> +	if (fence)
>>>> +		dma_fence_get(fence);
>>>> +	spin_lock(&syncobj->lock);
>>>> +	spin_lock_init(&signal_pt->base.lock);
>>>> +	dma_fence_init(&signal_pt->base.base,
>>>> +		       &drm_syncobj_stub_fence_ops,
>>>> +		       &signal_pt->base.lock,
>>>> +		       syncobj->syncobj_timeline.timeline_context, point);
>>>> +	signal_pt->signal_fence =
>>>> +		rcu_dereference_protected(fence,
>>>> +					  lockdep_is_held(&fence->lock));
>>>> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>>> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>> +					  struct drm_syncobj_signal_pt, list);
>>>> +		tail_pt_fence = &tail_pt->base.base;
>>>> +		if (dma_fence_is_signaled(tail_pt_fence))
>>>> +			tail_pt_fence = NULL;
>>>> +	}
>>>> +	if (tail_pt_fence)
>>>> +		signal_pt->pre_pt_base =
>>>> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>>>> +								lockdep_is_held(&tail_pt_fence->lock)));
>>>> +
>>>> +	signal_pt->value = point;
>>>> +	syncobj->syncobj_timeline.signal_point = point;
>>>> +	signal_pt->syncobj = syncobj;
>>>> +	INIT_LIST_HEAD(&signal_pt->list);
>>>> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	drm_syncobj_timeline_signal_submission_fences(syncobj);
>>>> +	/**
>>>> +	 * Every pt is depending on signal fence and previous pt fence, add
>>>> +	 * callbacks to them
>>>> +	 */
>>>> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
>>>> +		dma_fence_add_callback(signal_pt->signal_fence,
>>>> +				       &signal_pt->signal_cb,
>>>> +				       pt_signal_fence_func);
>>>> +	else
>>>> +		pt_signal_fence_func(signal_pt->signal_fence,
>>>> +				     &signal_pt->signal_cb);
>>>> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		dma_fence_add_callback(signal_pt->pre_pt_base,
>>>> +				       &signal_pt->pre_pt_cb,
>>>> +				       pt_pre_fence_func);
>>>> +	else
>>>> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
>>>> +
>>>> +
>>>> +	return 0;
>>>> +out:
>>>> +	kfree(signal_pt);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +/**
>>>> + * drm_syncobj_signal_fence - place fence into a sync object.
>>>> + * @syncobj: Sync object to replace fence in
>>>> + * @fence: fence to install in sync file.
>>>> + *
>>>> + * This places the fence into normal or timeline sync object
>>>> + */
>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>> +			      struct dma_fence *fence,
>>>> +			      u64 point)
>>> This is a very confusion function name. Feels like
>>> drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
>>> just extend replace_fence like you do with find_fence already.
>>>
>>>> +{
>>>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>>> +		drm_syncobj_replace_fence(syncobj, fence);
>>>> +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>>>> +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
>>>> +}
>>>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>>>> +
>>>>    static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>    {
>>>>    	struct drm_syncobj_stub_fence *fence;
>>>> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>    	return 0;
>>>>    }
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
>>>> +{
>>>> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +
>>>> +    spin_lock(&syncobj->lock);
>>>> +    while(node) {
>>>> +	    int result = point - wait_pt->value;
>>>> +
>>>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +	    if (result < 0)
>>>> +		    node = node->rb_left;
>>>> +	    else if (result > 0)
>>>> +		    node = node->rb_right;
>>>> +	    else
>>>> +		    break;
>>>> +    }
>>>> +    spin_unlock(&syncobj->lock);
>>>> +
>>>> +    return wait_pt;
>>>> +}
>>>> +
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
>>>> +{
>>>> +	struct drm_syncobj_wait_pt *wait_pt;
>>>> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>>> +
>>>> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>>>> +	if (!wait_pt)
>>>> +		return NULL;
>>>> +	spin_lock_init(&wait_pt->base.lock);
>>>> +	dma_fence_init(&wait_pt->base.base,
>>>> +		       &drm_syncobj_stub_fence_ops,
>>>> +		       &wait_pt->base.lock,
>>>> +		       syncobj->syncobj_timeline.timeline_context, point);
>>>> +	wait_pt->submission_fence = NULL;
>>>> +	/* check if the pt is already sumbitted, if yes, then don't need to
>>>> +	 * create submission fence, otherwise create it */
>>>> +	if (point > syncobj->syncobj_timeline.signal_point) {
>>>> +		wait_pt->submission_fence =
>>>> +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
>>>> +				GFP_KERNEL);
>>>> +		if (!wait_pt->submission_fence) {
>>>> +			dma_fence_put(&wait_pt->base.base);
>>>> +			return NULL;
>>>> +		}
>>>> +		spin_lock_init(&wait_pt->submission_fence->lock);
>>>> +		dma_fence_init(&wait_pt->submission_fence->base,
>>>> +			       &drm_syncobj_stub_fence_ops,
>>>> +			       &wait_pt->submission_fence->lock,
>>>> +			       syncobj->syncobj_timeline.submission_context,
>>>> +			       point);
>>>> +	}
>>>> +	wait_pt->value = point;
>>>> +
>>>> +	/* wait pt must be in an order, so that we can easily lookup and signal
>>>> +	 * it */
>>>> +	spin_lock(&syncobj->lock);
>>>> +	if (point <= syncobj->syncobj_timeline.timeline)
>>>> +		dma_fence_signal(&wait_pt->base.base);
>>>> +	if (wait_pt->submission_fence &&
>>>> +	    point <= syncobj->syncobj_timeline.signal_point)
>>>> +		dma_fence_signal(&wait_pt->submission_fence->base);
>>>> +	while(*new) {
>>>> +		struct drm_syncobj_wait_pt *this =
>>>> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
>>>> +		int result = wait_pt->value - this->value;
>>>> +
>>>> +		parent = *new;
>>>> +		if (result < 0)
>>>> +			new = &((*new)->rb_left);
>>>> +		else if (result > 0)
>>>> +			new = &((*new)->rb_right);
>>>> +		else
>>>> +			goto exist;
>>>> +	}
>>>> +
>>>> +	rb_link_node(&wait_pt->node, parent, new);
>>>> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	return wait_pt;
>>>> +exist:
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	if (wait_pt->submission_fence)
>>>> +		dma_fence_put(&wait_pt->submission_fence->base);
>>>> +	dma_fence_put(&wait_pt->base.base);
>>>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>> +	return wait_pt;
>>>> +}
>>>> +
>>>> +static struct dma_fence *
>>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
>>>> +{
>>>> +	struct drm_syncobj_wait_pt *wait_pt;
>>>> +
>>>> +	/* already signaled, simply return a signaled stub fence */
>>>> +	if (point <= syncobj->syncobj_timeline.timeline) {
>>>> +		struct drm_syncobj_stub_fence *fence;
>>>> +
>>>> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>> +		if (fence == NULL)
>>>> +			return NULL;
>>>> +
>>>> +		spin_lock_init(&fence->lock);
>>>> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>>>> +			       &fence->lock, 0, 0);
>>>> +		dma_fence_signal(&fence->base);
>>>> +		return &fence->base;
>>>> +	}
>>>> +
>>>> +	/* check if the wait pt exists */
>>>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>> +	if (!wait_pt) {
>>>> +		/* This is a new wait pt, so create it */
>>>> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
>>>> +		if (!wait_pt)
>>>> +			return NULL;
>>>> +	}
>>>> +	if (wait_pt) {
>>>> +		struct dma_fence *fence;
>>>> +		if (wait_pt->submission_fence) {
>>>> +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>>> +				dma_fence_wait(&wait_pt->submission_fence->base, true);
>>>> +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>>>> +				return NULL;
>>>> +		}
>>>> +		rcu_read_lock();
>>>> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
>>>> +		rcu_read_unlock();
>>>> +		return fence;
>>>> +	}
>>>> +	return NULL;
>>>> +}
>>>> +
>>>>    /**
>>>>     * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>>>>     * @file_private: drm file private pointer
>>>>     * @handle: sync object handle to lookup.
>>>>     * @fence: out parameter for the fence
>>>> + * @point: timeline point
>>>>     *
>>>>     * This is just a convenience function that combines drm_syncobj_find() and
>>>>     * drm_syncobj_fence_get().
>>>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>     */
>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    			   u32 handle,
>>>> -			   struct dma_fence **fence)
>>>> +			   struct dma_fence **fence,
>>>> +			   u64 point)
>>>>    {
>>>>    	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>>>>    	int ret = 0;
>>>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    	if (!syncobj)
>>>>    		return -ENOENT;
>>>> -	*fence = drm_syncobj_fence_get(syncobj);
>>>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>> +		*fence = drm_syncobj_fence_get(syncobj);
>>> Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
>>> driver bug. Or userspace bug, in which case we need to return -EINVAL;
>>>
>>> Hard to tell without the driver implementation.
>>>
>>>> +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
>>>> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>>>> +	} else {
>>>> +		DRM_ERROR("invalid syncobj type\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>>    	if (!*fence) {
>>>>    		ret = -EINVAL;
>>>>    	}
>>>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>>> +				      struct drm_syncobj_timeline *syncobj_timeline)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>>> +	    node != NULL; ) {
>>>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +		node = rb_next(node);
>>>> +		rb_erase(&wait_pt->node,
>>>> +			 &syncobj_timeline->wait_pt_tree);
>>>> +		RB_CLEAR_NODE(&wait_pt->node);
>>>> +		if (wait_pt->submission_fence)
>>>> +			dma_fence_put(&wait_pt->submission_fence->base);
>>>> +		/* kfree(wait_pt) is excuted by fence put */
>>>> +		dma_fence_put(&wait_pt->base.base);
>>>> +	}
>>>> +	list_for_each_entry_safe(signal_pt, tmp,
>>>> +				 &syncobj_timeline->signal_pt_list, list) {
>>>> +		list_del(&signal_pt->list);
>>>> +		dma_fence_put(signal_pt->signal_fence);
>>>> +		dma_fence_put(signal_pt->pre_pt_base);
>>>> +		dma_fence_put(&signal_pt->base.base);
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>>    /**
>>>>     * drm_syncobj_free - free a sync object.
>>>>     * @kref: kref to free.
>>>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>>>    						   struct drm_syncobj,
>>>>    						   refcount);
>>>>    	drm_syncobj_replace_fence(syncobj, NULL);
>>>> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>>    	kfree(syncobj);
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_free);
>>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>>> +				      *syncobj_timeline)
>>>> +{
>>>> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>>>> +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
>>>> +	syncobj_timeline->timeline = 0;
>>>> +	syncobj_timeline->signal_point = 0;
>>>> +
>>>> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
>>>> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>> +}
>>>>    /**
>>>>     * drm_syncobj_create - create a new syncobj
>>>>     * @out_syncobj: returned syncobj
>>>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>>>    	kref_init(&syncobj->refcount);
>>>>    	INIT_LIST_HEAD(&syncobj->cb_list);
>>>>    	spin_lock_init(&syncobj->lock);
>>>> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>>>> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>>> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>> +	} else {
>>>> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>>> +	}
>>>>    	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>>    		ret = drm_syncobj_assign_null_handle(syncobj);
>>>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
>>>>    	if (fd < 0)
>>>>    		return fd;
>>>> -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
>>>> +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>>>    	if (ret)
>>>>    		goto err_put_fd;
>>>> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
>>>> index 9029590267aa..f24c3eccb4d5 100644
>>>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>>>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>>>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>>>>    	kref_init(&exec->refcount);
>>>>    	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>>>> -				     &exec->bin.in_fence);
>>>> +				     &exec->bin.in_fence, 0);
>>>>    	if (ret == -EINVAL)
>>>>    		goto fail;
>>>>    	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>>>> -				     &exec->render.in_fence);
>>>> +				     &exec->render.in_fence, 0);
>>>>    	if (ret == -EINVAL)
>>>>    		goto fail;
>>>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
>>>> index 7910b9acedd6..f7b4971342e8 100644
>>>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>>>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>>>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>>>>    	if (args->in_sync) {
>>>>    		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>>>> -					     &in_fence);
>>>> +					     &in_fence, 0);
>>>>    		if (ret)
>>>>    			goto fail;
>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>> index b04c492ddbb5..b6e193c6cc9a 100644
>>>> --- a/include/drm/drm_syncobj.h
>>>> +++ b/include/drm/drm_syncobj.h
>>>> @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>>>    	.release = NULL,
>>>>    };
>>>> +enum drm_syncobj_type {
>>>> +	DRM_SYNCOBJ_TYPE_NORMAL,
>>>> +	DRM_SYNCOBJ_TYPE_TIMELINE
>>>> +};
>>>> +
>>>> +struct drm_syncobj;
>>>> +struct drm_syncobj_wait_pt {
>>>> +	struct drm_syncobj_stub_fence base;
>>>> +	struct drm_syncobj_stub_fence *submission_fence;
>>>> +	u64    value;
>>>> +	struct rb_node   node;
>>>> +};
>>>> +struct drm_syncobj_signal_pt {
>>>> +	struct drm_syncobj_stub_fence base;
>>>> +	struct dma_fence *signal_fence;
>>>> +	struct dma_fence *pre_pt_base;
>>>> +	struct dma_fence_cb signal_cb;
>>>> +	struct dma_fence_cb pre_pt_cb;
>>>> +	struct drm_syncobj *syncobj;
>>>> +	u64    value;
>>>> +	struct list_head list;
>>>> +};
>>> The above internal structs shouldn't be in the public header.
>>>
>>> Some tiny comments about what each piece is for would also go a long way
>>> towards me being able to understand things.
>>>
>>> Thanks, Daniel
>>>
>>>> +struct drm_syncobj_timeline {
>>>> +	u64 timeline_context;
>>>> +	u64 submission_context;
>>>> +	/**
>>>> +	 * @timeline: syncobj timeline
>>>> +	 */
>>>> +	u64 timeline;
>>>> +	u64 signal_point;
>>>> +
>>>> +
>>>> +	struct rb_root wait_pt_tree;
>>>> +	struct list_head signal_pt_list;
>>>> +};
>>>>    /**
>>>>     * struct drm_syncobj - sync object.
>>>>     *
>>>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>>>    	 * @refcount: Reference count of this object.
>>>>    	 */
>>>>    	struct kref refcount;
>>>> +	/**
>>>> +	 * @type: indicate syncobj type
>>>> +	 */
>>>> +	enum drm_syncobj_type type;
>>>> +	struct drm_syncobj_timeline syncobj_timeline;
>>>>    	/**
>>>>    	 * @fence:
>>>>    	 * NULL or a pointer to the fence bound to this object.
>>>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>>>    				 struct drm_syncobj_cb *cb);
>>>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>    			       struct dma_fence *fence);
>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>> +			      struct dma_fence *fence,
>>>> +			      u64 point);
>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    			   u32 handle,
>>>> -			   struct dma_fence **fence);
>>>> +			   struct dma_fence **fence, u64 point);
>>>>    void drm_syncobj_free(struct kref *kref);
>>>>    int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>>>    		       struct dma_fence *fence);
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 300f336633f2..71e6cd1c88f8 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>>>    struct drm_syncobj_create {
>>>>    	__u32 handle;
>>>>    #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>>>    	__u32 flags;
>>>>    };
>>>> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>>>    	__s32 fd;
>>>>    	__u32 pad;
>>>>    };
>>>> -
>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>    struct drm_syncobj_wait {
>>>> -- 
>>>> 2.14.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Wed, Aug 22, 2018 at 11:59 AM, zhoucm1 <zhoucm1@amd.com> wrote:
>
>
> On 2018年08月22日 17:31, Daniel Vetter wrote:
>>
>> On Wed, Aug 22, 2018 at 05:28:17PM +0800, zhoucm1 wrote:
>>>
>>>
>>> On 2018年08月22日 17:24, Daniel Vetter wrote:
>>>>
>>>> On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
>>>>>
>>>>> VK_KHR_timeline_semaphore:
>>>>> This extension introduces a new type of semaphore that has an integer
>>>>> payload
>>>>> identifying a point in a timeline. Such timeline semaphores support the
>>>>> following operations:
>>>>>     * Host query - A host operation that allows querying the payload of
>>>>> the
>>>>>       timeline semaphore.
>>>>>     * Host wait - A host operation that allows a blocking wait for a
>>>>>       timeline semaphore to reach a specified value.
>>>>>     * Device wait - A device operation that allows waiting for a
>>>>>       timeline semaphore to reach a specified value.
>>>>>     * Device signal - A device operation that allows advancing the
>>>>>       timeline semaphore to a specified value.
>>>>>
>>>>> Since it's a timeline, that means the front time point(PT) always is
>>>>> signaled before the late PT.
>>>>> a. signal PT design:
>>>>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence,
>>>>> when PT[N] fence is signaled,
>>>>> the timeline will increase to value of PT[N].
>>>>> b. wait PT design:
>>>>> Wait PT fence is signaled by reaching timeline point value, when
>>>>> timeline is increasing, will compare
>>>>> wait PTs value with new timeline value, if PT value is lower than
>>>>> timeline value, then wait PT will be
>>>>> signaled, otherwise keep in list. semaphore wait operation can wait on
>>>>> any point of timeline,
>>>>> so need a RB tree to order them. And wait PT could ahead of signal PT,
>>>>> we need a sumission fence to
>>>>> perform that.
>>>>>
>>>>> TODO:
>>>>> CPU query and wait on timeline semaphore.
>>>>
>>>> Another TODO: igt testcase for all the cornercases. We already have
>>>> other syncobj tests in there.
>>>
>>> Yes, I'm also trying to find where test should be wrote, Could you give a
>>> directory?
>>
>> There's already tests/syncobj_basic.c and tests/syncobj_wait.c. Either
>> extend those, or probably better to start a new tests/syncobj_timeline.c
>> since I expect this will have a lot of corner-cases we need to check.
>
> I failed to find them in both kernel and libdrm, Could you point which test
> you said?

igt testcases. It's a separate thing with lots of drm tests:

https://cgit.freedesktop.org/drm/igt-gpu-tools

I know that amdgpu has their tests in libdrm (which imo is a bit
unfortunate split, but there's also reasons and stuff).

Cheers, Daniel

> Thanks,
> David Zhou
>
>> -Daniel
>>
>>> Thanks,
>>> David Zhou
>>>>
>>>> That would also help with understanding how this is supposed to be used,
>>>> since I'm a bit too dense to immediately get your algorithm by just
>>>> staring at the code.
>>>>
>>>>
>>>>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>>> Cc: Christian Konig <christian.koenig@amd.com>
>>>>> Cc: Dave Airlie <airlied@redhat.com>
>>>>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>>>>    drivers/gpu/drm/drm_syncobj.c          | 385
>>>>> ++++++++++++++++++++++++++++++++-
>>>>>    drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>>>>    drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>>>>    include/drm/drm_syncobj.h              |  45 +++-
>>>>>    include/uapi/drm/drm.h                 |   3 +-
>>>>>    6 files changed, 435 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> index d42d1c8f78f6..463cc8960723 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> @@ -1105,7 +1105,7 @@ static int
>>>>> amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>>>>    {
>>>>>         int r;
>>>>>         struct dma_fence *fence;
>>>>> -       r = drm_syncobj_find_fence(p->filp, handle, &fence);
>>>>> +       r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>>>>         if (r)
>>>>>                 return r;
>>>>> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct
>>>>> amdgpu_cs_parser *p)
>>>>>    {
>>>>>         int i;
>>>>> -       for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>>>>> -               drm_syncobj_replace_fence(p->post_dep_syncobjs[i],
>>>>> p->fence);
>>>>> +       for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>>>>> +               drm_syncobj_signal_fence(p->post_dep_syncobjs[i],
>>>>> p->fence, 0);
>>>>> +       }
>>>>>    }
>>>>>    static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>> index 70af32d0def1..3709f36c901e 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj
>>>>> *syncobj,
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>>> +static void drm_syncobj_timeline_signal_submission_fences(struct
>>>>> drm_syncobj *syncobj)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +           node != NULL; node = rb_next(node)) {
>>>>> +           wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>>> +           if (wait_pt->value <=
>>>>> syncobj->syncobj_timeline.signal_point) {
>>>>> +                   if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_signal(&wait_pt->submission_fence->base);
>>>>> +           } else {
>>>>> +                   /* the loop is from left to right, the later entry
>>>>> value is
>>>>> +                    * bigger, so don't need to check any more */
>>>>> +                   break;
>>>>> +           }
>>>>
>>>> This seems to reinvet syncobj->cb_list. Since this is the exact same
>>>> "future fence that doesn't even exist yet" problem I think those two
>>>> code
>>>> path should be unified. In general I think it'd be much better if we
>>>> treat
>>>> the old syncobj as a timeline with a limit of 1 slot only.
>>>>
>>>> Or there needs to be a really good reason why all new code.
>>>>
>>>> Specifically for this stuff here having unified future fence semantics
>>>> will allow drivers to do clever stuff with them.
>>>>
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>> +
>>>>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj
>>>>> *syncobj)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +           node != NULL; ) {
>>>>> +               wait_pt = rb_entry(node, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               node = rb_next(node);
>>>>> +               if (wait_pt->value <=
>>>>> syncobj->syncobj_timeline.timeline) {
>>>>> +                       dma_fence_signal(&wait_pt->base.base);
>>>>> +                       rb_erase(&wait_pt->node,
>>>>> +
>>>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +                       RB_CLEAR_NODE(&wait_pt->node);
>>>>> +                       if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +                       /* kfree(wait_pt) is excuted by fence put */
>>>>> +                       dma_fence_put(&wait_pt->base.base);
>>>>> +               } else {
>>>>> +                       /* the loop is from left to right, the later
>>>>> entry value is
>>>>> +                        * bigger, so don't need to check any more */
>>>>> +                       break;
>>>>> +               }
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>> +
>>>>> +
>>>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>>> +{
>>>>> +       struct dma_fence *fence = NULL;
>>>>> +       struct drm_syncobj *syncobj;
>>>>> +
>>>>> +       fence = signal_pt->signal_fence;
>>>>> +       signal_pt->signal_fence = NULL;
>>>>> +       dma_fence_put(fence);
>>>>> +       fence = signal_pt->pre_pt_base;
>>>>> +       signal_pt->pre_pt_base = NULL;
>>>>> +       dma_fence_put(fence);
>>>>> +
>>>>> +       syncobj = signal_pt->syncobj;
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       list_del(&signal_pt->list);
>>>>> +       syncobj->syncobj_timeline.timeline = signal_pt->value;
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       /* kfree(signal_pt) will be  executed by below fence put */
>>>>> +       dma_fence_put(&signal_pt->base.base);
>>>>> +       drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>>> +}
>>>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>>>> +                                struct dma_fence_cb *cb)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               container_of(cb, struct drm_syncobj_signal_pt,
>>>>> signal_cb);
>>>>> +
>>>>> +       if (signal_pt->pre_pt_base &&
>>>>> +           !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               return;
>>>>> +
>>>>> +       pt_fence_cb(signal_pt);
>>>>> +}
>>>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>>>> +                                struct dma_fence_cb *cb)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               container_of(cb, struct drm_syncobj_signal_pt,
>>>>> pre_pt_cb);
>>>>> +
>>>>> +       if (signal_pt->signal_fence &&
>>>>> +           !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               return;
>>>>> +
>>>>> +       pt_fence_cb(signal_pt);
>>>>> +}
>>>>> +
>>>>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj
>>>>> *syncobj,
>>>>> +                                            struct dma_fence *fence,
>>>>> +                                            u64 point)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               kzalloc(sizeof(struct drm_syncobj_signal_pt),
>>>>> GFP_KERNEL);
>>>>> +       struct drm_syncobj_signal_pt *tail_pt;
>>>>> +       struct dma_fence *tail_pt_fence = NULL;
>>>>> +       int ret = 0;
>>>>> +
>>>>> +       if (!signal_pt)
>>>>> +               return -ENOMEM;
>>>>> +       if (syncobj->syncobj_timeline.signal_point >= point) {
>>>>> +               DRM_WARN("A later signal is ready!");
>>>>> +               goto out;
>>>>> +       }
>>>>> +       if (fence)
>>>>> +               dma_fence_get(fence);
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       spin_lock_init(&signal_pt->base.lock);
>>>>> +       dma_fence_init(&signal_pt->base.base,
>>>>> +                      &drm_syncobj_stub_fence_ops,
>>>>> +                      &signal_pt->base.lock,
>>>>> +                      syncobj->syncobj_timeline.timeline_context,
>>>>> point);
>>>>> +       signal_pt->signal_fence =
>>>>> +               rcu_dereference_protected(fence,
>>>>> +
>>>>> lockdep_is_held(&fence->lock));
>>>>> +       if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>>>> +               tail_pt =
>>>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>>> +                                         struct drm_syncobj_signal_pt,
>>>>> list);
>>>>> +               tail_pt_fence = &tail_pt->base.base;
>>>>> +               if (dma_fence_is_signaled(tail_pt_fence))
>>>>> +                       tail_pt_fence = NULL;
>>>>> +       }
>>>>> +       if (tail_pt_fence)
>>>>> +               signal_pt->pre_pt_base =
>>>>> +
>>>>> dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>>>>> +
>>>>> lockdep_is_held(&tail_pt_fence->lock)));
>>>>> +
>>>>> +       signal_pt->value = point;
>>>>> +       syncobj->syncobj_timeline.signal_point = point;
>>>>> +       signal_pt->syncobj = syncobj;
>>>>> +       INIT_LIST_HEAD(&signal_pt->list);
>>>>> +       list_add_tail(&signal_pt->list,
>>>>> &syncobj->syncobj_timeline.signal_pt_list);
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       drm_syncobj_timeline_signal_submission_fences(syncobj);
>>>>> +       /**
>>>>> +        * Every pt is depending on signal fence and previous pt fence,
>>>>> add
>>>>> +        * callbacks to them
>>>>> +        */
>>>>> +       if (!dma_fence_is_signaled(signal_pt->signal_fence))
>>>>> +               dma_fence_add_callback(signal_pt->signal_fence,
>>>>> +                                      &signal_pt->signal_cb,
>>>>> +                                      pt_signal_fence_func);
>>>>> +       else
>>>>> +               pt_signal_fence_func(signal_pt->signal_fence,
>>>>> +                                    &signal_pt->signal_cb);
>>>>> +       if (signal_pt->pre_pt_base &&
>>>>> !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               dma_fence_add_callback(signal_pt->pre_pt_base,
>>>>> +                                      &signal_pt->pre_pt_cb,
>>>>> +                                      pt_pre_fence_func);
>>>>> +       else
>>>>> +               pt_pre_fence_func(signal_pt->pre_pt_base,
>>>>> &signal_pt->pre_pt_cb);
>>>>> +
>>>>> +
>>>>> +       return 0;
>>>>> +out:
>>>>> +       kfree(signal_pt);
>>>>> +       return ret;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * drm_syncobj_signal_fence - place fence into a sync object.
>>>>> + * @syncobj: Sync object to replace fence in
>>>>> + * @fence: fence to install in sync file.
>>>>> + *
>>>>> + * This places the fence into normal or timeline sync object
>>>>> + */
>>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>>> +                             struct dma_fence *fence,
>>>>> +                             u64 point)
>>>>
>>>> This is a very confusion function name. Feels like
>>>> drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
>>>> just extend replace_fence like you do with find_fence already.
>>>>
>>>>> +{
>>>>> +       if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>>>> +               drm_syncobj_replace_fence(syncobj, fence);
>>>>> +       else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>>>>> +               drm_syncobj_timeline_signal_fence(syncobj, fence,
>>>>> point);
>>>>> +}
>>>>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>>>>> +
>>>>>    static int drm_syncobj_assign_null_handle(struct drm_syncobj
>>>>> *syncobj)
>>>>>    {
>>>>>         struct drm_syncobj_stub_fence *fence;
>>>>> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct
>>>>> drm_syncobj *syncobj)
>>>>>         return 0;
>>>>>    }
>>>>> +static struct drm_syncobj_wait_pt *
>>>>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64
>>>>> point)
>>>>> +{
>>>>> +    struct rb_node *node =
>>>>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +
>>>>> +    spin_lock(&syncobj->lock);
>>>>> +    while(node) {
>>>>> +           int result = point - wait_pt->value;
>>>>> +
>>>>> +           wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>>> +           if (result < 0)
>>>>> +                   node = node->rb_left;
>>>>> +           else if (result > 0)
>>>>> +                   node = node->rb_right;
>>>>> +           else
>>>>> +                   break;
>>>>> +    }
>>>>> +    spin_unlock(&syncobj->lock);
>>>>> +
>>>>> +    return wait_pt;
>>>>> +}
>>>>> +
>>>>> +static struct drm_syncobj_wait_pt *
>>>>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64
>>>>> point)
>>>>> +{
>>>>> +       struct drm_syncobj_wait_pt *wait_pt;
>>>>> +       struct rb_node **new =
>>>>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>>>> +
>>>>> +       wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>>>>> +       if (!wait_pt)
>>>>> +               return NULL;
>>>>> +       spin_lock_init(&wait_pt->base.lock);
>>>>> +       dma_fence_init(&wait_pt->base.base,
>>>>> +                      &drm_syncobj_stub_fence_ops,
>>>>> +                      &wait_pt->base.lock,
>>>>> +                      syncobj->syncobj_timeline.timeline_context,
>>>>> point);
>>>>> +       wait_pt->submission_fence = NULL;
>>>>> +       /* check if the pt is already sumbitted, if yes, then don't
>>>>> need to
>>>>> +        * create submission fence, otherwise create it */
>>>>> +       if (point > syncobj->syncobj_timeline.signal_point) {
>>>>> +               wait_pt->submission_fence =
>>>>> +                       kzalloc(sizeof(struct drm_syncobj_stub_fence),
>>>>> +                               GFP_KERNEL);
>>>>> +               if (!wait_pt->submission_fence) {
>>>>> +                       dma_fence_put(&wait_pt->base.base);
>>>>> +                       return NULL;
>>>>> +               }
>>>>> +               spin_lock_init(&wait_pt->submission_fence->lock);
>>>>> +               dma_fence_init(&wait_pt->submission_fence->base,
>>>>> +                              &drm_syncobj_stub_fence_ops,
>>>>> +                              &wait_pt->submission_fence->lock,
>>>>> +
>>>>> syncobj->syncobj_timeline.submission_context,
>>>>> +                              point);
>>>>> +       }
>>>>> +       wait_pt->value = point;
>>>>> +
>>>>> +       /* wait pt must be in an order, so that we can easily lookup
>>>>> and signal
>>>>> +        * it */
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       if (point <= syncobj->syncobj_timeline.timeline)
>>>>> +               dma_fence_signal(&wait_pt->base.base);
>>>>> +       if (wait_pt->submission_fence &&
>>>>> +           point <= syncobj->syncobj_timeline.signal_point)
>>>>> +               dma_fence_signal(&wait_pt->submission_fence->base);
>>>>> +       while(*new) {
>>>>> +               struct drm_syncobj_wait_pt *this =
>>>>> +                       rb_entry(*new, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               int result = wait_pt->value - this->value;
>>>>> +
>>>>> +               parent = *new;
>>>>> +               if (result < 0)
>>>>> +                       new = &((*new)->rb_left);
>>>>> +               else if (result > 0)
>>>>> +                       new = &((*new)->rb_right);
>>>>> +               else
>>>>> +                       goto exist;
>>>>> +       }
>>>>> +
>>>>> +       rb_link_node(&wait_pt->node, parent, new);
>>>>> +       rb_insert_color(&wait_pt->node,
>>>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       return wait_pt;
>>>>> +exist:
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       if (wait_pt->submission_fence)
>>>>> +               dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +       dma_fence_put(&wait_pt->base.base);
>>>>> +       wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>>> +       return wait_pt;
>>>>> +}
>>>>> +
>>>>> +static struct dma_fence *
>>>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point,
>>>>> u64 flag)
>>>>> +{
>>>>> +       struct drm_syncobj_wait_pt *wait_pt;
>>>>> +
>>>>> +       /* already signaled, simply return a signaled stub fence */
>>>>> +       if (point <= syncobj->syncobj_timeline.timeline) {
>>>>> +               struct drm_syncobj_stub_fence *fence;
>>>>> +
>>>>> +               fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>>> +               if (fence == NULL)
>>>>> +                       return NULL;
>>>>> +
>>>>> +               spin_lock_init(&fence->lock);
>>>>> +               dma_fence_init(&fence->base,
>>>>> &drm_syncobj_stub_fence_ops,
>>>>> +                              &fence->lock, 0, 0);
>>>>> +               dma_fence_signal(&fence->base);
>>>>> +               return &fence->base;
>>>>> +       }
>>>>> +
>>>>> +       /* check if the wait pt exists */
>>>>> +       wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>>> +       if (!wait_pt) {
>>>>> +               /* This is a new wait pt, so create it */
>>>>> +               wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj,
>>>>> point);
>>>>> +               if (!wait_pt)
>>>>> +                       return NULL;
>>>>> +       }
>>>>> +       if (wait_pt) {
>>>>> +               struct dma_fence *fence;
>>>>> +               if (wait_pt->submission_fence) {
>>>>> +                       if (flag &
>>>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>>>> +
>>>>> dma_fence_wait(&wait_pt->submission_fence->base, true);
>>>>> +                       else if
>>>>> (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>>>>> +                               return NULL;
>>>>> +               }
>>>>> +               rcu_read_lock();
>>>>> +               fence = dma_fence_get_rcu(&wait_pt->base.base);
>>>>> +               rcu_read_unlock();
>>>>> +               return fence;
>>>>> +       }
>>>>> +       return NULL;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * drm_syncobj_find_fence - lookup and reference the fence in a sync
>>>>> object
>>>>>     * @file_private: drm file private pointer
>>>>>     * @handle: sync object handle to lookup.
>>>>>     * @fence: out parameter for the fence
>>>>> + * @point: timeline point
>>>>>     *
>>>>>     * This is just a convenience function that combines
>>>>> drm_syncobj_find() and
>>>>>     * drm_syncobj_fence_get().
>>>>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct
>>>>> drm_syncobj *syncobj)
>>>>>     */
>>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>>                            u32 handle,
>>>>> -                          struct dma_fence **fence)
>>>>> +                          struct dma_fence **fence,
>>>>> +                          u64 point)
>>>>>    {
>>>>>         struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>>>> handle);
>>>>>         int ret = 0;
>>>>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file
>>>>> *file_private,
>>>>>         if (!syncobj)
>>>>>                 return -ENOENT;
>>>>> -       *fence = drm_syncobj_fence_get(syncobj);
>>>>> +       if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>>> +               *fence = drm_syncobj_fence_get(syncobj);
>>>>
>>>> Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
>>>> driver bug. Or userspace bug, in which case we need to return -EINVAL;
>>>>
>>>> Hard to tell without the driver implementation.
>>>>
>>>>> +       }else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>>> +               *fence = drm_syncobj_timeline_point_get(syncobj, point,
>>>>> +
>>>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>>>>> +       } else {
>>>>> +               DRM_ERROR("invalid syncobj type\n");
>>>>> +               return -EINVAL;
>>>>> +       }
>>>>>         if (!*fence) {
>>>>>                 ret = -EINVAL;
>>>>>         }
>>>>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file
>>>>> *file_private,
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>>>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>>>> +                                     struct drm_syncobj_timeline
>>>>> *syncobj_timeline)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +       struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>>>> +           node != NULL; ) {
>>>>> +               wait_pt = rb_entry(node, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               node = rb_next(node);
>>>>> +               rb_erase(&wait_pt->node,
>>>>> +                        &syncobj_timeline->wait_pt_tree);
>>>>> +               RB_CLEAR_NODE(&wait_pt->node);
>>>>> +               if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +               /* kfree(wait_pt) is excuted by fence put */
>>>>> +               dma_fence_put(&wait_pt->base.base);
>>>>> +       }
>>>>> +       list_for_each_entry_safe(signal_pt, tmp,
>>>>> +                                &syncobj_timeline->signal_pt_list,
>>>>> list) {
>>>>> +               list_del(&signal_pt->list);
>>>>> +               dma_fence_put(signal_pt->signal_fence);
>>>>> +               dma_fence_put(signal_pt->pre_pt_base);
>>>>> +               dma_fence_put(&signal_pt->base.base);
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>>    /**
>>>>>     * drm_syncobj_free - free a sync object.
>>>>>     * @kref: kref to free.
>>>>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>>>>                                                    struct drm_syncobj,
>>>>>                                                    refcount);
>>>>>         drm_syncobj_replace_fence(syncobj, NULL);
>>>>> +       drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>>>         kfree(syncobj);
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_free);
>>>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>>>> +                                     *syncobj_timeline)
>>>>> +{
>>>>> +       syncobj_timeline->timeline_context =
>>>>> dma_fence_context_alloc(1);
>>>>> +       syncobj_timeline->submission_context =
>>>>> dma_fence_context_alloc(1);
>>>>> +       syncobj_timeline->timeline = 0;
>>>>> +       syncobj_timeline->signal_point = 0;
>>>>> +
>>>>> +       syncobj_timeline->wait_pt_tree = RB_ROOT;
>>>>> +       INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>>> +}
>>>>>    /**
>>>>>     * drm_syncobj_create - create a new syncobj
>>>>>     * @out_syncobj: returned syncobj
>>>>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj
>>>>> **out_syncobj, uint32_t flags,
>>>>>         kref_init(&syncobj->refcount);
>>>>>         INIT_LIST_HEAD(&syncobj->cb_list);
>>>>>         spin_lock_init(&syncobj->lock);
>>>>> +       if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>>>>> +               syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>>>> +               drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>>> +       } else {
>>>>> +               syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>>>> +       }
>>>>>         if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>>>                 ret = drm_syncobj_assign_null_handle(syncobj);
>>>>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct
>>>>> drm_file *file_private,
>>>>>         if (fd < 0)
>>>>>                 return fd;
>>>>> -       ret = drm_syncobj_find_fence(file_private, handle, &fence);
>>>>> +       ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>>>>         if (ret)
>>>>>                 goto err_put_fd;
>>>>> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> b/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> index 9029590267aa..f24c3eccb4d5 100644
>>>>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void
>>>>> *data,
>>>>>         kref_init(&exec->refcount);
>>>>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>>>>> -                                    &exec->bin.in_fence);
>>>>> +                                    &exec->bin.in_fence, 0);
>>>>>         if (ret == -EINVAL)
>>>>>                 goto fail;
>>>>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>>>>> -                                    &exec->render.in_fence);
>>>>> +                                    &exec->render.in_fence, 0);
>>>>>         if (ret == -EINVAL)
>>>>>                 goto fail;
>>>>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> b/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> index 7910b9acedd6..f7b4971342e8 100644
>>>>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void
>>>>> *data,
>>>>>         if (args->in_sync) {
>>>>>                 ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>>>>> -                                            &in_fence);
>>>>> +                                            &in_fence, 0);
>>>>>                 if (ret)
>>>>>                         goto fail;
>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>> index b04c492ddbb5..b6e193c6cc9a 100644
>>>>> --- a/include/drm/drm_syncobj.h
>>>>> +++ b/include/drm/drm_syncobj.h
>>>>> @@ -54,6 +54,41 @@ const struct dma_fence_ops
>>>>> drm_syncobj_stub_fence_ops = {
>>>>>         .release = NULL,
>>>>>    };
>>>>> +enum drm_syncobj_type {
>>>>> +       DRM_SYNCOBJ_TYPE_NORMAL,
>>>>> +       DRM_SYNCOBJ_TYPE_TIMELINE
>>>>> +};
>>>>> +
>>>>> +struct drm_syncobj;
>>>>> +struct drm_syncobj_wait_pt {
>>>>> +       struct drm_syncobj_stub_fence base;
>>>>> +       struct drm_syncobj_stub_fence *submission_fence;
>>>>> +       u64    value;
>>>>> +       struct rb_node   node;
>>>>> +};
>>>>> +struct drm_syncobj_signal_pt {
>>>>> +       struct drm_syncobj_stub_fence base;
>>>>> +       struct dma_fence *signal_fence;
>>>>> +       struct dma_fence *pre_pt_base;
>>>>> +       struct dma_fence_cb signal_cb;
>>>>> +       struct dma_fence_cb pre_pt_cb;
>>>>> +       struct drm_syncobj *syncobj;
>>>>> +       u64    value;
>>>>> +       struct list_head list;
>>>>> +};
>>>>
>>>> The above internal structs shouldn't be in the public header.
>>>>
>>>> Some tiny comments about what each piece is for would also go a long way
>>>> towards me being able to understand things.
>>>>
>>>> Thanks, Daniel
>>>>
>>>>> +struct drm_syncobj_timeline {
>>>>> +       u64 timeline_context;
>>>>> +       u64 submission_context;
>>>>> +       /**
>>>>> +        * @timeline: syncobj timeline
>>>>> +        */
>>>>> +       u64 timeline;
>>>>> +       u64 signal_point;
>>>>> +
>>>>> +
>>>>> +       struct rb_root wait_pt_tree;
>>>>> +       struct list_head signal_pt_list;
>>>>> +};
>>>>>    /**
>>>>>     * struct drm_syncobj - sync object.
>>>>>     *
>>>>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>>>>          * @refcount: Reference count of this object.
>>>>>          */
>>>>>         struct kref refcount;
>>>>> +       /**
>>>>> +        * @type: indicate syncobj type
>>>>> +        */
>>>>> +       enum drm_syncobj_type type;
>>>>> +       struct drm_syncobj_timeline syncobj_timeline;
>>>>>         /**
>>>>>          * @fence:
>>>>>          * NULL or a pointer to the fence bound to this object.
>>>>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct
>>>>> drm_syncobj *syncobj,
>>>>>                                  struct drm_syncobj_cb *cb);
>>>>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>>                                struct dma_fence *fence);
>>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>>> +                             struct dma_fence *fence,
>>>>> +                             u64 point);
>>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>>                            u32 handle,
>>>>> -                          struct dma_fence **fence);
>>>>> +                          struct dma_fence **fence, u64 point);
>>>>>    void drm_syncobj_free(struct kref *kref);
>>>>>    int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t
>>>>> flags,
>>>>>                        struct dma_fence *fence);
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index 300f336633f2..71e6cd1c88f8 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>>>>    struct drm_syncobj_create {
>>>>>         __u32 handle;
>>>>>    #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>>>>         __u32 flags;
>>>>>    };
>>>>> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>>>>         __s32 fd;
>>>>>         __u32 pad;
>>>>>    };
>>>>> -
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>    struct drm_syncobj_wait {
>>>>> --
>>>>> 2.14.1
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>