[1/2] dma-buf: make reservation_object_copy_fences rcu save

Submitted by Maarten Lankhorst on Sept. 11, 2017, 1:56 p.m.

Details

Message ID dbcc9a56-f6bf-c552-bdd1-363042536fb2@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 2 ) in AMD X.Org drivers

Browsing this patch as part of:
"Series without cover letter" rev 2 in AMD X.Org drivers
<< prev patch [2/2] next patch >>

Commit Message

Maarten Lankhorst Sept. 11, 2017, 1:56 p.m.
Op 11-09-17 om 14:53 schreef Christian König:
> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>> Op 04-09-17 om 21:02 schreef Christian König:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Stop requiring that the src reservation object is locked for this operation.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++-----------
>>>   1 file changed, 42 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
>>> index dec3a81..b44d9d7 100644
>>> --- a/drivers/dma-buf/reservation.c
>>> +++ b/drivers/dma-buf/reservation.c
>>> @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
>>>   * @dst: the destination reservation object
>>>   * @src: the source reservation object
>>>   *
>>> -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
>>> -* held.
>>> +* Copy all fences from src to dst. dst-lock must be held.
>>>   */
>>>   int reservation_object_copy_fences(struct reservation_object *dst,
>>>                      struct reservation_object *src)
>> Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
>
> I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list. 

Doesn't seem too hard, below is the result from fiddling with the allocation function..
reservation_object_list is just a list of fences with some junk in front.

Here you go, but only tested with the compiler. O:)
-----8<-----
 drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------
 1 file changed, 109 insertions(+), 83 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index dec3a815455d..72ee91f34132 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -261,87 +261,43 @@  void reservation_object_add_excl_fence(struct reservation_object *obj,
 }
 EXPORT_SYMBOL(reservation_object_add_excl_fence);
 
-/**
-* reservation_object_copy_fences - Copy all fences from src to dst.
-* @dst: the destination reservation object
-* @src: the source reservation object
-*
-* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
-* held.
-*/
-int reservation_object_copy_fences(struct reservation_object *dst,
-				   struct reservation_object *src)
+static bool realloc_shared(void **ptr, struct dma_fence ***pshared, unsigned shared_max, bool alloc_list, bool unlocked)
 {
-	struct reservation_object_list *src_list, *dst_list;
-	struct dma_fence *old, *new;
-	size_t size;
-	unsigned i;
-
-	src_list = reservation_object_get_list(src);
-
-	if (src_list) {
-		size = offsetof(typeof(*src_list),
-				shared[src_list->shared_count]);
-		dst_list = kmalloc(size, GFP_KERNEL);
-		if (!dst_list)
-			return -ENOMEM;
-
-		dst_list->shared_count = src_list->shared_count;
-		dst_list->shared_max = src_list->shared_count;
-		for (i = 0; i < src_list->shared_count; ++i)
-			dst_list->shared[i] =
-				dma_fence_get(src_list->shared[i]);
-	} else {
-		dst_list = NULL;
-	}
-
-	kfree(dst->staged);
-	dst->staged = NULL;
+	size_t sz;
+	void *newptr;
 
-	src_list = reservation_object_get_list(dst);
+	if (alloc_list)
+		sz = offsetof(struct reservation_object_list, shared[shared_max]);
+	else
+		sz = shared_max * sizeof(struct dma_fence *);
 
-	old = reservation_object_get_excl(dst);
-	new = reservation_object_get_excl(src);
+	newptr = krealloc(*ptr, sz, unlocked ? GFP_KERNEL : GFP_NOWAIT | __GFP_NOWARN);
+	if (!newptr)
+		return false;
 
-	dma_fence_get(new);
+	*ptr = newptr;
+	if (alloc_list) {
+		struct reservation_object_list *fobj = newptr;
 
-	preempt_disable();
-	write_seqcount_begin(&dst->seq);
-	/* write_seqcount_begin provides the necessary memory barrier */
-	RCU_INIT_POINTER(dst->fence_excl, new);
-	RCU_INIT_POINTER(dst->fence, dst_list);
-	write_seqcount_end(&dst->seq);
-	preempt_enable();
-
-	if (src_list)
-		kfree_rcu(src_list, rcu);
-	dma_fence_put(old);
+		fobj->shared_max = shared_max;
+		*pshared = fobj->shared;
+	} else
+		*pshared = newptr;
 
-	return 0;
+	return true;
 }
-EXPORT_SYMBOL(reservation_object_copy_fences);
 
-/**
- * reservation_object_get_fences_rcu - Get an object's shared and exclusive
- * fences without update side lock held
- * @obj: the reservation object
- * @pfence_excl: the returned exclusive fence (or NULL)
- * @pshared_count: the number of shared fences returned
- * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
- * the required size, and must be freed by caller)
- *
- * RETURNS
- * Zero or -errno
- */
-int reservation_object_get_fences_rcu(struct reservation_object *obj,
+static int __reservation_object_get_fences_rcu(struct reservation_object *obj,
 				      struct dma_fence **pfence_excl,
 				      unsigned *pshared_count,
-				      struct dma_fence ***pshared)
+				      struct dma_fence ***pshared,
+				      struct reservation_object_list **list_shared)
 {
 	struct dma_fence **shared = NULL;
 	struct dma_fence *fence_excl;
 	unsigned int shared_count;
 	int ret = 1;
+	void *ptr = NULL;
 
 	do {
 		struct reservation_object_list *fobj;
@@ -359,23 +315,16 @@  int reservation_object_get_fences_rcu(struct reservation_object *obj,
 
 		fobj = rcu_dereference(obj->fence);
 		if (fobj) {
-			struct dma_fence **nshared;
-			size_t sz = sizeof(*shared) * fobj->shared_max;
-
-			nshared = krealloc(shared, sz,
-					   GFP_NOWAIT | __GFP_NOWARN);
-			if (!nshared) {
+			if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, false)) {
 				rcu_read_unlock();
-				nshared = krealloc(shared, sz, GFP_KERNEL);
-				if (nshared) {
-					shared = nshared;
-					continue;
+				if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, true)) {
+					ret = -ENOMEM;
+					break;
 				}
 
-				ret = -ENOMEM;
-				break;
+				/* need to acquire rcu lock again and retry */
+				continue;
 			}
-			shared = nshared;
 			shared_count = fobj->shared_count;
 
 			for (i = 0; i < shared_count; ++i) {
@@ -398,16 +347,93 @@  int reservation_object_get_fences_rcu(struct reservation_object *obj,
 	} while (ret);
 
 	if (!shared_count) {
-		kfree(shared);
+		kfree(ptr);
+		ptr = NULL;
 		shared = NULL;
 	}
 
-	*pshared_count = shared_count;
-	*pshared = shared;
-	*pfence_excl = fence_excl;
+	if (!list_shared) {
+		*pshared_count = shared_count;
+		*pshared = shared;
+	} else {
+		*list_shared = ptr;
+		if (ptr)
+			(*list_shared)->shared_count = shared_count;
+	}
 
+	*pfence_excl = fence_excl;
 	return ret;
 }
+
+
+/**
+* reservation_object_copy_fences - Copy all fences from src to dst.
+* @dst: the destination reservation object
+* @src: the source reservation object
+*
+* Copy all fences from src to dst. dst-lock must be held.
+*/
+int reservation_object_copy_fences(struct reservation_object *dst,
+				   struct reservation_object *src)
+{
+	struct reservation_object_list *old_list, *dst_list;
+	struct dma_fence *excl, *old;
+	unsigned i;
+	int ret;
+
+	ret = __reservation_object_get_fences_rcu(src, &excl, NULL,
+						  NULL, &dst_list);
+	if (ret)
+		return ret;
+
+	kfree(dst->staged);
+	dst->staged = NULL;
+
+	old = rcu_dereference_protected(dst->fence_excl,
+					reservation_object_held(dst));
+
+	old_list = rcu_dereference_protected(dst->fence,
+					     reservation_object_held(dst));
+
+	preempt_disable();
+	write_seqcount_begin(&dst->seq);
+	/* write_seqcount_begin provides the necessary memory barrier */
+	RCU_INIT_POINTER(dst->fence_excl, excl);
+	RCU_INIT_POINTER(dst->fence, dst_list);
+	write_seqcount_end(&dst->seq);
+	preempt_enable();
+
+	if (old_list) {
+		for (i = 0; i < old_list->shared_count; i++)
+			dma_fence_put(old_list->shared[i]);
+
+		kfree_rcu(old_list, rcu);
+	}
+	dma_fence_put(old);
+
+	return 0;
+}
+EXPORT_SYMBOL(reservation_object_copy_fences);
+
+/**
+ * reservation_object_get_fences_rcu - Get an object's shared and exclusive
+ * fences without update side lock held
+ * @obj: the reservation object
+ * @pfence_excl: the returned exclusive fence (or NULL)
+ * @pshared_count: the number of shared fences returned
+ * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
+ * the required size, and must be freed by caller)
+ *
+ * RETURNS
+ * Zero or -errno
+ */
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
+				      struct dma_fence **pfence_excl,
+				      unsigned *pshared_count,
+				      struct dma_fence ***pshared)
+{
+	return __reservation_object_get_fences_rcu(obj, pfence_excl, pshared_count, pshared, NULL);
+}
 EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
 
 /**

Comments

Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
> Op 11-09-17 om 14:53 schreef Christian König:
>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>> Op 04-09-17 om 21:02 schreef Christian König:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> Stop requiring that the src reservation object is locked for this operation.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++-----------
>>>>    1 file changed, 42 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
>>>> index dec3a81..b44d9d7 100644
>>>> --- a/drivers/dma-buf/reservation.c
>>>> +++ b/drivers/dma-buf/reservation.c
>>>> @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
>>>>    * @dst: the destination reservation object
>>>>    * @src: the source reservation object
>>>>    *
>>>> -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
>>>> -* held.
>>>> +* Copy all fences from src to dst. dst-lock must be held.
>>>>    */
>>>>    int reservation_object_copy_fences(struct reservation_object *dst,
>>>>                       struct reservation_object *src)
>>> Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
>> I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
> Doesn't seem too hard, below is the result from fiddling with the allocation function..
> reservation_object_list is just a list of fences with some junk in front.
>
> Here you go, but only tested with the compiler. O:)
> -----8<-----
>   drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------
>   1 file changed, 109 insertions(+), 83 deletions(-)

To be honest that looks rather ugly to me for not much gain.

Additional to that we loose the optimization I've stolen from the wait 
function.

Regards,
Christian.

>
> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
> index dec3a815455d..72ee91f34132 100644
> --- a/drivers/dma-buf/reservation.c
> +++ b/drivers/dma-buf/reservation.c
> @@ -261,87 +261,43 @@ void reservation_object_add_excl_fence(struct reservation_object *obj,
>   }
>   EXPORT_SYMBOL(reservation_object_add_excl_fence);
>   
> -/**
> -* reservation_object_copy_fences - Copy all fences from src to dst.
> -* @dst: the destination reservation object
> -* @src: the source reservation object
> -*
> -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
> -* held.
> -*/
> -int reservation_object_copy_fences(struct reservation_object *dst,
> -				   struct reservation_object *src)
> +static bool realloc_shared(void **ptr, struct dma_fence ***pshared, unsigned shared_max, bool alloc_list, bool unlocked)
>   {
> -	struct reservation_object_list *src_list, *dst_list;
> -	struct dma_fence *old, *new;
> -	size_t size;
> -	unsigned i;
> -
> -	src_list = reservation_object_get_list(src);
> -
> -	if (src_list) {
> -		size = offsetof(typeof(*src_list),
> -				shared[src_list->shared_count]);
> -		dst_list = kmalloc(size, GFP_KERNEL);
> -		if (!dst_list)
> -			return -ENOMEM;
> -
> -		dst_list->shared_count = src_list->shared_count;
> -		dst_list->shared_max = src_list->shared_count;
> -		for (i = 0; i < src_list->shared_count; ++i)
> -			dst_list->shared[i] =
> -				dma_fence_get(src_list->shared[i]);
> -	} else {
> -		dst_list = NULL;
> -	}
> -
> -	kfree(dst->staged);
> -	dst->staged = NULL;
> +	size_t sz;
> +	void *newptr;
>   
> -	src_list = reservation_object_get_list(dst);
> +	if (alloc_list)
> +		sz = offsetof(struct reservation_object_list, shared[shared_max]);
> +	else
> +		sz = shared_max * sizeof(struct dma_fence *);
>   
> -	old = reservation_object_get_excl(dst);
> -	new = reservation_object_get_excl(src);
> +	newptr = krealloc(*ptr, sz, unlocked ? GFP_KERNEL : GFP_NOWAIT | __GFP_NOWARN);
> +	if (!newptr)
> +		return false;
>   
> -	dma_fence_get(new);
> +	*ptr = newptr;
> +	if (alloc_list) {
> +		struct reservation_object_list *fobj = newptr;
>   
> -	preempt_disable();
> -	write_seqcount_begin(&dst->seq);
> -	/* write_seqcount_begin provides the necessary memory barrier */
> -	RCU_INIT_POINTER(dst->fence_excl, new);
> -	RCU_INIT_POINTER(dst->fence, dst_list);
> -	write_seqcount_end(&dst->seq);
> -	preempt_enable();
> -
> -	if (src_list)
> -		kfree_rcu(src_list, rcu);
> -	dma_fence_put(old);
> +		fobj->shared_max = shared_max;
> +		*pshared = fobj->shared;
> +	} else
> +		*pshared = newptr;
>   
> -	return 0;
> +	return true;
>   }
> -EXPORT_SYMBOL(reservation_object_copy_fences);
>   
> -/**
> - * reservation_object_get_fences_rcu - Get an object's shared and exclusive
> - * fences without update side lock held
> - * @obj: the reservation object
> - * @pfence_excl: the returned exclusive fence (or NULL)
> - * @pshared_count: the number of shared fences returned
> - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
> - * the required size, and must be freed by caller)
> - *
> - * RETURNS
> - * Zero or -errno
> - */
> -int reservation_object_get_fences_rcu(struct reservation_object *obj,
> +static int __reservation_object_get_fences_rcu(struct reservation_object *obj,
>   				      struct dma_fence **pfence_excl,
>   				      unsigned *pshared_count,
> -				      struct dma_fence ***pshared)
> +				      struct dma_fence ***pshared,
> +				      struct reservation_object_list **list_shared)
>   {
>   	struct dma_fence **shared = NULL;
>   	struct dma_fence *fence_excl;
>   	unsigned int shared_count;
>   	int ret = 1;
> +	void *ptr = NULL;
>   
>   	do {
>   		struct reservation_object_list *fobj;
> @@ -359,23 +315,16 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
>   
>   		fobj = rcu_dereference(obj->fence);
>   		if (fobj) {
> -			struct dma_fence **nshared;
> -			size_t sz = sizeof(*shared) * fobj->shared_max;
> -
> -			nshared = krealloc(shared, sz,
> -					   GFP_NOWAIT | __GFP_NOWARN);
> -			if (!nshared) {
> +			if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, false)) {
>   				rcu_read_unlock();
> -				nshared = krealloc(shared, sz, GFP_KERNEL);
> -				if (nshared) {
> -					shared = nshared;
> -					continue;
> +				if (!realloc_shared(&ptr, &shared, fobj->shared_max, list_shared, true)) {
> +					ret = -ENOMEM;
> +					break;
>   				}
>   
> -				ret = -ENOMEM;
> -				break;
> +				/* need to acquire rcu lock again and retry */
> +				continue;
>   			}
> -			shared = nshared;
>   			shared_count = fobj->shared_count;
>   
>   			for (i = 0; i < shared_count; ++i) {
> @@ -398,16 +347,93 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
>   	} while (ret);
>   
>   	if (!shared_count) {
> -		kfree(shared);
> +		kfree(ptr);
> +		ptr = NULL;
>   		shared = NULL;
>   	}
>   
> -	*pshared_count = shared_count;
> -	*pshared = shared;
> -	*pfence_excl = fence_excl;
> +	if (!list_shared) {
> +		*pshared_count = shared_count;
> +		*pshared = shared;
> +	} else {
> +		*list_shared = ptr;
> +		if (ptr)
> +			(*list_shared)->shared_count = shared_count;
> +	}
>   
> +	*pfence_excl = fence_excl;
>   	return ret;
>   }
> +
> +
> +/**
> +* reservation_object_copy_fences - Copy all fences from src to dst.
> +* @dst: the destination reservation object
> +* @src: the source reservation object
> +*
> +* Copy all fences from src to dst. dst-lock must be held.
> +*/
> +int reservation_object_copy_fences(struct reservation_object *dst,
> +				   struct reservation_object *src)
> +{
> +	struct reservation_object_list *old_list, *dst_list;
> +	struct dma_fence *excl, *old;
> +	unsigned i;
> +	int ret;
> +
> +	ret = __reservation_object_get_fences_rcu(src, &excl, NULL,
> +						  NULL, &dst_list);
> +	if (ret)
> +		return ret;
> +
> +	kfree(dst->staged);
> +	dst->staged = NULL;
> +
> +	old = rcu_dereference_protected(dst->fence_excl,
> +					reservation_object_held(dst));
> +
> +	old_list = rcu_dereference_protected(dst->fence,
> +					     reservation_object_held(dst));
> +
> +	preempt_disable();
> +	write_seqcount_begin(&dst->seq);
> +	/* write_seqcount_begin provides the necessary memory barrier */
> +	RCU_INIT_POINTER(dst->fence_excl, excl);
> +	RCU_INIT_POINTER(dst->fence, dst_list);
> +	write_seqcount_end(&dst->seq);
> +	preempt_enable();
> +
> +	if (old_list) {
> +		for (i = 0; i < old_list->shared_count; i++)
> +			dma_fence_put(old_list->shared[i]);
> +
> +		kfree_rcu(old_list, rcu);
> +	}
> +	dma_fence_put(old);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(reservation_object_copy_fences);
> +
> +/**
> + * reservation_object_get_fences_rcu - Get an object's shared and exclusive
> + * fences without update side lock held
> + * @obj: the reservation object
> + * @pfence_excl: the returned exclusive fence (or NULL)
> + * @pshared_count: the number of shared fences returned
> + * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
> + * the required size, and must be freed by caller)
> + *
> + * RETURNS
> + * Zero or -errno
> + */
> +int reservation_object_get_fences_rcu(struct reservation_object *obj,
> +				      struct dma_fence **pfence_excl,
> +				      unsigned *pshared_count,
> +				      struct dma_fence ***pshared)
> +{
> +	return __reservation_object_get_fences_rcu(obj, pfence_excl, pshared_count, pshared, NULL);
> +}
>   EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
>   
>   /**
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Op 11-09-17 om 16:45 schreef Christian König:
> Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
>> Op 11-09-17 om 14:53 schreef Christian König:
>>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>>> Op 04-09-17 om 21:02 schreef Christian König:
>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> Stop requiring that the src reservation object is locked for this operation.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/dma-buf/reservation.c | 56 ++++++++++++++++++++++++++++++++-----------
>>>>>    1 file changed, 42 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
>>>>> index dec3a81..b44d9d7 100644
>>>>> --- a/drivers/dma-buf/reservation.c
>>>>> +++ b/drivers/dma-buf/reservation.c
>>>>> @@ -266,8 +266,7 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence);
>>>>>    * @dst: the destination reservation object
>>>>>    * @src: the source reservation object
>>>>>    *
>>>>> -* Copy all fences from src to dst. Both src->lock as well as dst-lock must be
>>>>> -* held.
>>>>> +* Copy all fences from src to dst. dst-lock must be held.
>>>>>    */
>>>>>    int reservation_object_copy_fences(struct reservation_object *dst,
>>>>>                       struct reservation_object *src)
>>>> Could this be implemented using reservation_object_get_fences_rcu? You're essentially duplicating its functionality.
>>> I've considered this as well, but reservation_object_get_fences_rcu() returns an array and here we need an reservation_object_list.
>> Doesn't seem too hard, below is the result from fiddling with the allocation function..
>> reservation_object_list is just a list of fences with some junk in front.
>>
>> Here you go, but only tested with the compiler. O:)
>> -----8<-----
>>   drivers/dma-buf/reservation.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------
>>   1 file changed, 109 insertions(+), 83 deletions(-)
>
> To be honest that looks rather ugly to me for not much gain.
>
> Additional to that we loose the optimization I've stolen from the wait function.

Right now your version does exactly the same as reservation_object_get_fences_rcu,
but with a reservation_object_list instead of a fence array.

Can't you change reservation_object_get_fences_rcu to return a
reservation_object_list? All callsites look like they could be easily converted.

So the new patch series would be:
1. Make reservation_object_get_fences_rcu return a reservation_object_list
2. Add optimization
3. Convert reservation_object_copy_fences to use rcu.

Cheers,
Maarten
Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
> Op 11-09-17 om 16:45 schreef Christian König:
>> Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
>>> Op 11-09-17 om 14:53 schreef Christian König:
>>>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>>> [SNIP]
>> To be honest that looks rather ugly to me for not much gain.
>>
>> Additional to that we loose the optimization I've stolen from the wait function.
> Right now your version does exactly the same as reservation_object_get_fences_rcu,
> but with a reservation_object_list instead of a fence array.

Well then please take a closer look again:
>                 for (i = 0; i < src_list->shared_count; ++i) {
>                         struct dma_fence *fence;
>
>                         fence = rcu_dereference(src_list->shared[i]);
>                         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>                                      &fence->flags))
>                                 continue;
>
>                         if (!dma_fence_get_rcu(fence)) {
>                                 kfree(dst_list);
>                                 src_list = rcu_dereference(src->fence);
>                                 goto retry;
>                         }
>
>                         if (dma_fence_is_signaled(fence)) {
>                                 dma_fence_put(fence);
>                                 continue;
>                         }
>
> dst_list->shared[dst_list->shared_count++] = fence;
>                 }

We only take fences into the new reservation list when they aren't 
already signaled.

This can't be added to reservation_object_get_fences_rcu() because that 
would break VM handling on radeon and amdgpu.

Regards,
Christian.
Am 11.09.2017 um 17:22 schrieb Christian König:
> Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
>> Op 11-09-17 om 16:45 schreef Christian König:
>>> Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
>>>> Op 11-09-17 om 14:53 schreef Christian König:
>>>>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>>>> [SNIP]
>>> To be honest that looks rather ugly to me for not much gain.
>>>
>>> Additional to that we loose the optimization I've stolen from the 
>>> wait function.
>> Right now your version does exactly the same as 
>> reservation_object_get_fences_rcu,
>> but with a reservation_object_list instead of a fence array.
>
> Well then please take a closer look again:
>>                 for (i = 0; i < src_list->shared_count; ++i) {
>>                         struct dma_fence *fence;
>>
>>                         fence = rcu_dereference(src_list->shared[i]);
>>                         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>                                      &fence->flags))
>>                                 continue;
>>
>>                         if (!dma_fence_get_rcu(fence)) {
>>                                 kfree(dst_list);
>>                                 src_list = rcu_dereference(src->fence);
>>                                 goto retry;
>>                         }
>>
>>                         if (dma_fence_is_signaled(fence)) {
>>                                 dma_fence_put(fence);
>>                                 continue;
>>                         }
>>
>> dst_list->shared[dst_list->shared_count++] = fence;
>>                 }
>
> We only take fences into the new reservation list when they aren't 
> already signaled.
>
> This can't be added to reservation_object_get_fences_rcu() because 
> that would break VM handling on radeon and amdgpu.

What we could do is adding a function to return all fences (including 
the exclusive one) as reservation_object_list() and use that in both the 
wait as well as the copy function.

Regards,
Christian.

>
> Regards,
> Christian.
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Op 11-09-17 om 17:24 schreef Christian König:
> Am 11.09.2017 um 17:22 schrieb Christian König:
>> Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
>>> Op 11-09-17 om 16:45 schreef Christian König:
>>>> Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
>>>>> Op 11-09-17 om 14:53 schreef Christian König:
>>>>>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>>>>> [SNIP]
>>>> To be honest that looks rather ugly to me for not much gain.
>>>>
>>>> Additional to that we loose the optimization I've stolen from the wait function.
>>> Right now your version does exactly the same as reservation_object_get_fences_rcu,
>>> but with a reservation_object_list instead of a fence array.
>>
>> Well then please take a closer look again:
>>>                 for (i = 0; i < src_list->shared_count; ++i) {
>>>                         struct dma_fence *fence;
>>>
>>>                         fence = rcu_dereference(src_list->shared[i]);
>>>                         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>>                                      &fence->flags))
>>>                                 continue;
>>>
>>>                         if (!dma_fence_get_rcu(fence)) {
>>>                                 kfree(dst_list);
>>>                                 src_list = rcu_dereference(src->fence);
>>>                                 goto retry;
>>>                         }
>>>
>>>                         if (dma_fence_is_signaled(fence)) {
>>>                                 dma_fence_put(fence);
>>>                                 continue;
>>>                         }
>>>
>>> dst_list->shared[dst_list->shared_count++] = fence;
>>>                 }
>>
>> We only take fences into the new reservation list when they aren't already signaled.
>>
>> This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
>
> What we could do is adding a function to return all fences (including the exclusive one) as reservation_object_list() and use that in both the wait as well as the copy function. 
Yeah, but I don't see the problem with VM, guessing amdgpu_vm_prt_fini.. why would it break if I pruned the signaled fences from the copied list?
Am 11.09.2017 um 17:29 schrieb Maarten Lankhorst:
> Op 11-09-17 om 17:24 schreef Christian König:
>> Am 11.09.2017 um 17:22 schrieb Christian König:
>>> Am 11.09.2017 um 17:13 schrieb Maarten Lankhorst:
>>>> Op 11-09-17 om 16:45 schreef Christian König:
>>>>> Am 11.09.2017 um 15:56 schrieb Maarten Lankhorst:
>>>>>> Op 11-09-17 om 14:53 schreef Christian König:
>>>>>>> Am 10.09.2017 um 09:30 schrieb Maarten Lankhorst:
>>>>>>> [SNIP]
>>>>> To be honest that looks rather ugly to me for not much gain.
>>>>>
>>>>> Additional to that we loose the optimization I've stolen from the wait function.
>>>> Right now your version does exactly the same as reservation_object_get_fences_rcu,
>>>> but with a reservation_object_list instead of a fence array.
>>> Well then please take a closer look again:
>>>>                  for (i = 0; i < src_list->shared_count; ++i) {
>>>>                          struct dma_fence *fence;
>>>>
>>>>                          fence = rcu_dereference(src_list->shared[i]);
>>>>                          if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>>>                                       &fence->flags))
>>>>                                  continue;
>>>>
>>>>                          if (!dma_fence_get_rcu(fence)) {
>>>>                                  kfree(dst_list);
>>>>                                  src_list = rcu_dereference(src->fence);
>>>>                                  goto retry;
>>>>                          }
>>>>
>>>>                          if (dma_fence_is_signaled(fence)) {
>>>>                                  dma_fence_put(fence);
>>>>                                  continue;
>>>>                          }
>>>>
>>>> dst_list->shared[dst_list->shared_count++] = fence;
>>>>                  }
>>> We only take fences into the new reservation list when they aren't already signaled.
>>>
>>> This can't be added to reservation_object_get_fences_rcu() because that would break VM handling on radeon and amdgpu.
>> What we could do is adding a function to return all fences (including the exclusive one) as reservation_object_list() and use that in both the wait as well as the copy function.
> Yeah, but I don't see the problem with VM, guessing amdgpu_vm_prt_fini.. why would it break if I pruned the signaled fences from the copied list?

Not the PRT stuff would break, but the VM flush handling. Rather long 
story, but basically we need to have the already signaled fences as well 
to correctly update the hardware state.

We could of course handle all that in a single function which gets its 
behavior as parameters, but to be honest I think just having two copies 
of the code which looks almost the same is cleaner.

Regards,
Christian.

> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx