[v2] dma-fence: Store the timestamp in the same union as the cb_list

Submitted by Chris Wilson on Aug. 17, 2019, 3:30 p.m.

Details

Message ID 20190817153022.5749-1-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 3 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson Aug. 17, 2019, 3:30 p.m.
The timestamp and the cb_list are mutually exclusive, the cb_list can
only be added to prior to being signaled (and once signaled we drain),
while the timestamp is only valid upon being signaled. Both the
timestamp and the cb_list are only valid while the fence is alive, and
as soon as no references are held can be replaced by the rcu_head.

By reusing the union for the timestamp, we squeeze the base dma_fence
struct to 64 bytes on x86-64.

v2: Sort the union chronologically

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-fence.c                 | 16 +++++++-------
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++++++------
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c       |  3 +++
 include/linux/dma-fence.h                   | 23 ++++++++++++++++-----
 4 files changed, 37 insertions(+), 18 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 8a6d0250285d..2c136aee3e79 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -129,6 +129,7 @@  EXPORT_SYMBOL(dma_fence_context_alloc);
 int dma_fence_signal_locked(struct dma_fence *fence)
 {
 	struct dma_fence_cb *cur, *tmp;
+	struct list_head cb_list;
 
 	lockdep_assert_held(fence->lock);
 
@@ -136,16 +137,16 @@  int dma_fence_signal_locked(struct dma_fence *fence)
 				      &fence->flags)))
 		return -EINVAL;
 
+	/* Stash the cb_list before replacing it with the timestamp */
+	list_replace(&fence->cb_list, &cb_list);
+
 	fence->timestamp = ktime_get();
 	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 	trace_dma_fence_signaled(fence);
 
-	if (!list_empty(&fence->cb_list)) {
-		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
-			INIT_LIST_HEAD(&cur->node);
-			cur->func(fence, cur);
-		}
-		INIT_LIST_HEAD(&fence->cb_list);
+	list_for_each_entry_safe(cur, tmp, &cb_list, node) {
+		INIT_LIST_HEAD(&cur->node);
+		cur->func(fence, cur);
 	}
 
 	return 0;
@@ -231,7 +232,8 @@  void dma_fence_release(struct kref *kref)
 
 	trace_dma_fence_destroy(fence);
 
-	if (WARN(!list_empty(&fence->cb_list),
+	if (WARN(!list_empty(&fence->cb_list) &&
+		 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
 		 "Fence %s:%s:%llx:%llx released with pending signals!\n",
 		 fence->ops->get_driver_name(fence),
 		 fence->ops->get_timeline_name(fence),
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 2bc9c460e78d..09c68dda2098 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -114,18 +114,18 @@  __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
 }
 
 static void
-__dma_fence_signal__notify(struct dma_fence *fence)
+__dma_fence_signal__notify(struct dma_fence *fence,
+			   const struct list_head *list)
 {
 	struct dma_fence_cb *cur, *tmp;
 
 	lockdep_assert_held(fence->lock);
 	lockdep_assert_irqs_disabled();
 
-	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+	list_for_each_entry_safe(cur, tmp, list, node) {
 		INIT_LIST_HEAD(&cur->node);
 		cur->func(fence, cur);
 	}
-	INIT_LIST_HEAD(&fence->cb_list);
 }
 
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
@@ -187,11 +187,12 @@  void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	list_for_each_safe(pos, next, &signal) {
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
-
-		__dma_fence_signal__timestamp(&rq->fence, timestamp);
+		struct list_head cb_list;
 
 		spin_lock(&rq->lock);
-		__dma_fence_signal__notify(&rq->fence);
+		list_replace(&rq->fence.cb_list, &cb_list);
+		__dma_fence_signal__timestamp(&rq->fence, timestamp);
+		__dma_fence_signal__notify(&rq->fence, &cb_list);
 		spin_unlock(&rq->lock);
 
 		i915_request_put(rq);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 434dfadb0e52..178a6cd1a06f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -185,6 +185,9 @@  static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)
 
 	spin_lock(f->lock);
 
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
+		goto out;
+
 	if (intr && signal_pending(current)) {
 		ret = -ERESTARTSYS;
 		goto out;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 2ce4d877d33e..8b4a5aaa6848 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -65,17 +65,30 @@  struct dma_fence_cb;
 struct dma_fence {
 	spinlock_t *lock;
 	const struct dma_fence_ops *ops;
-	/* We clear the callback list on kref_put so that by the time we
-	 * release the fence it is unused. No one should be adding to the cb_list
-	 * that they don't themselves hold a reference for.
+	/*
+	 * We clear the callback list on kref_put so that by the time we
+	 * release the fence it is unused. No one should be adding to the
+	 * cb_list that they don't themselves hold a reference for.
+	 *
+	 * The lifetime of the timestamp is similarly tied to both the
+	 * rcu freelist and the cb_list. The timestamp is only set upon
+	 * signaling while simultaneously notifying the cb_list. Ergo, we
+	 * only use either the cb_list of timestamp. Upon destruction,
+	 * neither are accessible, and so we can use the rcu. This means
+	 * that the cb_list is *only* valid until the signal bit is set,
+	 * and to read either you *must* hold a reference to the fence.
+	 *
+	 * Listed in chronological order.
 	 */
 	union {
-		struct rcu_head rcu;
 		struct list_head cb_list;
+		/* @cb_list replaced by @timestamp on dma_fence_signal() */
+		ktime_t timestamp;
+		/* @timestamp replaced by @rcu on dma_fence_release() */
+		struct rcu_head rcu;
 	};
 	u64 context;
 	u64 seqno;
-	ktime_t timestamp;
 	unsigned long flags;
 	struct kref refcount;
 	int error;

Comments

Am 17.08.19 um 17:30 schrieb Chris Wilson:
> The timestamp and the cb_list are mutually exclusive, the cb_list can

> only be added to prior to being signaled (and once signaled we drain),

> while the timestamp is only valid upon being signaled. Both the

> timestamp and the cb_list are only valid while the fence is alive, and

> as soon as no references are held can be replaced by the rcu_head.

>

> By reusing the union for the timestamp, we squeeze the base dma_fence

> struct to 64 bytes on x86-64.

>

> v2: Sort the union chronologically

>

> Suggested-by: Christian König <christian.koenig@amd.com>

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

> Cc: Christian König <christian.koenig@amd.com>


I can't judge about the correctness of the vmw and Intel stuff, so only 
Acked-by: Christian König <christian.koenig@amd.com>.


> ---

>   drivers/dma-buf/dma-fence.c                 | 16 +++++++-------

>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++++++------

>   drivers/gpu/drm/vmwgfx/vmwgfx_fence.c       |  3 +++

>   include/linux/dma-fence.h                   | 23 ++++++++++++++++-----

>   4 files changed, 37 insertions(+), 18 deletions(-)

>

> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c

> index 8a6d0250285d..2c136aee3e79 100644

> --- a/drivers/dma-buf/dma-fence.c

> +++ b/drivers/dma-buf/dma-fence.c

> @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc);

>   int dma_fence_signal_locked(struct dma_fence *fence)

>   {

>   	struct dma_fence_cb *cur, *tmp;

> +	struct list_head cb_list;

>   

>   	lockdep_assert_held(fence->lock);

>   

> @@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence)

>   				      &fence->flags)))

>   		return -EINVAL;

>   

> +	/* Stash the cb_list before replacing it with the timestamp */

> +	list_replace(&fence->cb_list, &cb_list);

> +

>   	fence->timestamp = ktime_get();

>   	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);

>   	trace_dma_fence_signaled(fence);

>   

> -	if (!list_empty(&fence->cb_list)) {

> -		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {

> -			INIT_LIST_HEAD(&cur->node);

> -			cur->func(fence, cur);

> -		}

> -		INIT_LIST_HEAD(&fence->cb_list);

> +	list_for_each_entry_safe(cur, tmp, &cb_list, node) {

> +		INIT_LIST_HEAD(&cur->node);

> +		cur->func(fence, cur);

>   	}

>   

>   	return 0;

> @@ -231,7 +232,8 @@ void dma_fence_release(struct kref *kref)

>   

>   	trace_dma_fence_destroy(fence);

>   

> -	if (WARN(!list_empty(&fence->cb_list),

> +	if (WARN(!list_empty(&fence->cb_list) &&

> +		 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),

>   		 "Fence %s:%s:%llx:%llx released with pending signals!\n",

>   		 fence->ops->get_driver_name(fence),

>   		 fence->ops->get_timeline_name(fence),

> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

> index 2bc9c460e78d..09c68dda2098 100644

> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

> @@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)

>   }

>   

>   static void

> -__dma_fence_signal__notify(struct dma_fence *fence)

> +__dma_fence_signal__notify(struct dma_fence *fence,

> +			   const struct list_head *list)

>   {

>   	struct dma_fence_cb *cur, *tmp;

>   

>   	lockdep_assert_held(fence->lock);

>   	lockdep_assert_irqs_disabled();

>   

> -	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {

> +	list_for_each_entry_safe(cur, tmp, list, node) {

>   		INIT_LIST_HEAD(&cur->node);

>   		cur->func(fence, cur);

>   	}

> -	INIT_LIST_HEAD(&fence->cb_list);

>   }

>   

>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)

> @@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)

>   	list_for_each_safe(pos, next, &signal) {

>   		struct i915_request *rq =

>   			list_entry(pos, typeof(*rq), signal_link);

> -

> -		__dma_fence_signal__timestamp(&rq->fence, timestamp);

> +		struct list_head cb_list;

>   

>   		spin_lock(&rq->lock);

> -		__dma_fence_signal__notify(&rq->fence);

> +		list_replace(&rq->fence.cb_list, &cb_list);

> +		__dma_fence_signal__timestamp(&rq->fence, timestamp);

> +		__dma_fence_signal__notify(&rq->fence, &cb_list);

>   		spin_unlock(&rq->lock);

>   

>   		i915_request_put(rq);

> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

> index 434dfadb0e52..178a6cd1a06f 100644

> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

> @@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)

>   

>   	spin_lock(f->lock);

>   

> +	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))

> +		goto out;

> +

>   	if (intr && signal_pending(current)) {

>   		ret = -ERESTARTSYS;

>   		goto out;

> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h

> index 2ce4d877d33e..8b4a5aaa6848 100644

> --- a/include/linux/dma-fence.h

> +++ b/include/linux/dma-fence.h

> @@ -65,17 +65,30 @@ struct dma_fence_cb;

>   struct dma_fence {

>   	spinlock_t *lock;

>   	const struct dma_fence_ops *ops;

> -	/* We clear the callback list on kref_put so that by the time we

> -	 * release the fence it is unused. No one should be adding to the cb_list

> -	 * that they don't themselves hold a reference for.

> +	/*

> +	 * We clear the callback list on kref_put so that by the time we

> +	 * release the fence it is unused. No one should be adding to the

> +	 * cb_list that they don't themselves hold a reference for.

> +	 *

> +	 * The lifetime of the timestamp is similarly tied to both the

> +	 * rcu freelist and the cb_list. The timestamp is only set upon

> +	 * signaling while simultaneously notifying the cb_list. Ergo, we

> +	 * only use either the cb_list of timestamp. Upon destruction,

> +	 * neither are accessible, and so we can use the rcu. This means

> +	 * that the cb_list is *only* valid until the signal bit is set,

> +	 * and to read either you *must* hold a reference to the fence.

> +	 *

> +	 * Listed in chronological order.

>   	 */

>   	union {

> -		struct rcu_head rcu;

>   		struct list_head cb_list;

> +		/* @cb_list replaced by @timestamp on dma_fence_signal() */

> +		ktime_t timestamp;

> +		/* @timestamp replaced by @rcu on dma_fence_release() */

> +		struct rcu_head rcu;

>   	};

>   	u64 context;

>   	u64 seqno;

> -	ktime_t timestamp;

>   	unsigned long flags;

>   	struct kref refcount;

>   	int error;