[Mesa-dev] gallium/os: add os_wait_until_zero

Submitted by Marek Olšák on June 26, 2015, 11:05 a.m.

Details

Message ID 1435316711-10890-1-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák June 26, 2015, 11:05 a.m.
From: Marek Olšák <marek.olsak@amd.com>

This will be used by radeon and amdgpu winsyses.
Copied from the amdgpu winsys.
---
 src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
 src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
index f7e4ca4..63b6879 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -33,11 +33,12 @@ 
  */
 
 
-#include "pipe/p_config.h"
+#include "pipe/p_defines.h"
 
 #if defined(PIPE_OS_UNIX)
 #  include <time.h> /* timeval */
 #  include <sys/time.h> /* timeval */
+#  include <sched.h> /* sched_yield */
 #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 #  include <windows.h>
 #else
@@ -92,3 +93,36 @@  os_time_sleep(int64_t usecs)
 }
 
 #endif
+
+
+bool os_wait_until_zero(int *var, uint64_t timeout)
+{
+   if (!*var)
+      return true;
+
+   if (!timeout)
+      return false;
+
+   if (timeout == PIPE_TIMEOUT_INFINITE) {
+      while (*var) {
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+   else {
+      int64_t start_time = os_time_get_nano();
+      int64_t end_time = start_time + timeout;
+
+      while (*var) {
+         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
+            return false;
+
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+}
diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
index 4fab03c..fdc8040 100644
--- a/src/gallium/auxiliary/os/os_time.h
+++ b/src/gallium/auxiliary/os/os_time.h
@@ -94,6 +94,16 @@  os_time_timeout(int64_t start,
 }
 
 
+/**
+ * Wait until the variable at the given memory location is zero.
+ *
+ * \param var           variable
+ * \param timeout       timeout, can be anything from 0 (no wait) to
+ *                      PIPE_TIME_INFINITE (wait forever)
+ * \return     true if the variable is zero
+ */
+bool os_wait_until_zero(int *var, uint64_t timeout);
+
 #ifdef	__cplusplus
 }
 #endif

Comments

On 26/06/15 12:05, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> This will be used by radeon and amdgpu winsyses.
> Copied from the amdgpu winsys.
> ---
>   src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>   src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>   2 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
> index f7e4ca4..63b6879 100644
> --- a/src/gallium/auxiliary/os/os_time.c
> +++ b/src/gallium/auxiliary/os/os_time.c
> @@ -33,11 +33,12 @@
>    */
>
>
> -#include "pipe/p_config.h"
> +#include "pipe/p_defines.h"
>
>   #if defined(PIPE_OS_UNIX)
>   #  include <time.h> /* timeval */
>   #  include <sys/time.h> /* timeval */
> +#  include <sched.h> /* sched_yield */
>   #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>   #  include <windows.h>
>   #else
> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>   }
>
>   #endif
> +
> +
> +bool os_wait_until_zero(int *var, uint64_t timeout)

should var be a volatile pointer? I'm surprised it works without it.

Maybe it just works on Unixes thanks to the sched_yield call, and the 
assumption it might any any side effects.

Jose

> +{
> +   if (!*var)
> +      return true;
> +
> +   if (!timeout)
> +      return false;
> +
> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
> +      while (*var) {
> +#if defined(PIPE_OS_UNIX)
> +         sched_yield();
> +#endif
> +      }
> +      return true;
> +   }
> +   else {
> +      int64_t start_time = os_time_get_nano();
> +      int64_t end_time = start_time + timeout;
> +
> +      while (*var) {
> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
> +            return false;
> +
> +#if defined(PIPE_OS_UNIX)
> +         sched_yield();
> +#endif
> +      }
> +      return true;
> +   }
> +}
> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
> index 4fab03c..fdc8040 100644
> --- a/src/gallium/auxiliary/os/os_time.h
> +++ b/src/gallium/auxiliary/os/os_time.h
> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>   }
>
>
> +/**
> + * Wait until the variable at the given memory location is zero.
> + *
> + * \param var           variable
> + * \param timeout       timeout, can be anything from 0 (no wait) to
> + *                      PIPE_TIME_INFINITE (wait forever)
> + * \return     true if the variable is zero
> + */
> +bool os_wait_until_zero(int *var, uint64_t timeout);
> +
>   #ifdef	__cplusplus
>   }
>   #endif
>
On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> This will be used by radeon and amdgpu winsyses.
> Copied from the amdgpu winsys.
> ---
>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>  2 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
> index f7e4ca4..63b6879 100644
> --- a/src/gallium/auxiliary/os/os_time.c
> +++ b/src/gallium/auxiliary/os/os_time.c
> @@ -33,11 +33,12 @@
>   */
>
>
> -#include "pipe/p_config.h"
> +#include "pipe/p_defines.h"
>
>  #if defined(PIPE_OS_UNIX)
>  #  include <time.h> /* timeval */
>  #  include <sys/time.h> /* timeval */
> +#  include <sched.h> /* sched_yield */
>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>  #  include <windows.h>
>  #else
> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>  }
>
>  #endif
> +
> +
> +bool os_wait_until_zero(int *var, uint64_t timeout)

Does this need to be volatile?

> +{
> +   if (!*var)
> +      return true;
> +
> +   if (!timeout)
> +      return false;
> +
> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
> +      while (*var) {
> +#if defined(PIPE_OS_UNIX)
> +         sched_yield();
> +#endif
> +      }
> +      return true;
> +   }
> +   else {
> +      int64_t start_time = os_time_get_nano();
> +      int64_t end_time = start_time + timeout;
> +
> +      while (*var) {
> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
> +            return false;
> +
> +#if defined(PIPE_OS_UNIX)
> +         sched_yield();
> +#endif
> +      }
> +      return true;
> +   }
> +}
> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
> index 4fab03c..fdc8040 100644
> --- a/src/gallium/auxiliary/os/os_time.h
> +++ b/src/gallium/auxiliary/os/os_time.h
> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>  }
>
>
> +/**
> + * Wait until the variable at the given memory location is zero.
> + *
> + * \param var           variable
> + * \param timeout       timeout, can be anything from 0 (no wait) to
> + *                      PIPE_TIME_INFINITE (wait forever)
> + * \return     true if the variable is zero
> + */
> +bool os_wait_until_zero(int *var, uint64_t timeout);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
I expect the variable will be changed using an atomic operation by the
CPU, or using a coherent store instruction by the GPU.

If this is wrong and volatile is really required here, then
p_atomic_read is wrong too. Should we fix it? For example:

#define p_atomic_read(_v) (*(volatile int*)(_v))

Then, os_wait_until_zero can use p_atomic_read.

Marek

On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> This will be used by radeon and amdgpu winsyses.
>> Copied from the amdgpu winsys.
>> ---
>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>> index f7e4ca4..63b6879 100644
>> --- a/src/gallium/auxiliary/os/os_time.c
>> +++ b/src/gallium/auxiliary/os/os_time.c
>> @@ -33,11 +33,12 @@
>>   */
>>
>>
>> -#include "pipe/p_config.h"
>> +#include "pipe/p_defines.h"
>>
>>  #if defined(PIPE_OS_UNIX)
>>  #  include <time.h> /* timeval */
>>  #  include <sys/time.h> /* timeval */
>> +#  include <sched.h> /* sched_yield */
>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>  #  include <windows.h>
>>  #else
>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>  }
>>
>>  #endif
>> +
>> +
>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>
> Does this need to be volatile?
>
>> +{
>> +   if (!*var)
>> +      return true;
>> +
>> +   if (!timeout)
>> +      return false;
>> +
>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>> +      while (*var) {
>> +#if defined(PIPE_OS_UNIX)
>> +         sched_yield();
>> +#endif
>> +      }
>> +      return true;
>> +   }
>> +   else {
>> +      int64_t start_time = os_time_get_nano();
>> +      int64_t end_time = start_time + timeout;
>> +
>> +      while (*var) {
>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>> +            return false;
>> +
>> +#if defined(PIPE_OS_UNIX)
>> +         sched_yield();
>> +#endif
>> +      }
>> +      return true;
>> +   }
>> +}
>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>> index 4fab03c..fdc8040 100644
>> --- a/src/gallium/auxiliary/os/os_time.h
>> +++ b/src/gallium/auxiliary/os/os_time.h
>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>  }
>>
>>
>> +/**
>> + * Wait until the variable at the given memory location is zero.
>> + *
>> + * \param var           variable
>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>> + *                      PIPE_TIME_INFINITE (wait forever)
>> + * \return     true if the variable is zero
>> + */
>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> --
>> 2.1.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
> I expect the variable will be changed using an atomic operation by the
> CPU, or using a coherent store instruction by the GPU.
>
> If this is wrong and volatile is really required here, then
> p_atomic_read is wrong too. Should we fix it? For example:
>
> #define p_atomic_read(_v) (*(volatile int*)(_v))
>
> Then, os_wait_until_zero can use p_atomic_read.

The problem is the lack of a memory barrier. A function call to
sched_yield implies a barrier (since the function could well have
changed the memory that var points to), and so it's all fine. But on a
non-PIPE_OS_UNIX os, this becomes

while (*var);

Which the compiler would be well within its right to rewrite to

x = *var;
while (x);

which wouldn't end well. You need a barrier that tells the compiler
that memory may be invalidated... I believe this maps to either mb()
or barrier() in the linux kernel, and they have some fancy assembly
syntax to tell the compiler "oops, memory may have changed". However
in this case, it may be easier to just mark the var volatile.

>
> Marek
>
> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>> From: Marek Olšák <marek.olsak@amd.com>
>>>
>>> This will be used by radeon and amdgpu winsyses.
>>> Copied from the amdgpu winsys.
>>> ---
>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>> index f7e4ca4..63b6879 100644
>>> --- a/src/gallium/auxiliary/os/os_time.c
>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>> @@ -33,11 +33,12 @@
>>>   */
>>>
>>>
>>> -#include "pipe/p_config.h"
>>> +#include "pipe/p_defines.h"
>>>
>>>  #if defined(PIPE_OS_UNIX)
>>>  #  include <time.h> /* timeval */
>>>  #  include <sys/time.h> /* timeval */
>>> +#  include <sched.h> /* sched_yield */
>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>  #  include <windows.h>
>>>  #else
>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>  }
>>>
>>>  #endif
>>> +
>>> +
>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>
>> Does this need to be volatile?
>>
>>> +{
>>> +   if (!*var)
>>> +      return true;
>>> +
>>> +   if (!timeout)
>>> +      return false;
>>> +
>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>> +      while (*var) {
>>> +#if defined(PIPE_OS_UNIX)
>>> +         sched_yield();
>>> +#endif
>>> +      }
>>> +      return true;
>>> +   }
>>> +   else {
>>> +      int64_t start_time = os_time_get_nano();
>>> +      int64_t end_time = start_time + timeout;
>>> +
>>> +      while (*var) {
>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>> +            return false;
>>> +
>>> +#if defined(PIPE_OS_UNIX)
>>> +         sched_yield();
>>> +#endif
>>> +      }
>>> +      return true;
>>> +   }
>>> +}
>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>> index 4fab03c..fdc8040 100644
>>> --- a/src/gallium/auxiliary/os/os_time.h
>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>  }
>>>
>>>
>>> +/**
>>> + * Wait until the variable at the given memory location is zero.
>>> + *
>>> + * \param var           variable
>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>> + * \return     true if the variable is zero
>>> + */
>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>> +
>>>  #ifdef __cplusplus
>>>  }
>>>  #endif
>>> --
>>> 2.1.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
I assumed the atomic operation in another thread would act as a
barrier in this case. Is that right?

Marek

On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>> I expect the variable will be changed using an atomic operation by the
>> CPU, or using a coherent store instruction by the GPU.
>>
>> If this is wrong and volatile is really required here, then
>> p_atomic_read is wrong too. Should we fix it? For example:
>>
>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>
>> Then, os_wait_until_zero can use p_atomic_read.
>
> The problem is the lack of a memory barrier. A function call to
> sched_yield implies a barrier (since the function could well have
> changed the memory that var points to), and so it's all fine. But on a
> non-PIPE_OS_UNIX os, this becomes
>
> while (*var);
>
> Which the compiler would be well within its right to rewrite to
>
> x = *var;
> while (x);
>
> which wouldn't end well. You need a barrier that tells the compiler
> that memory may be invalidated... I believe this maps to either mb()
> or barrier() in the linux kernel, and they have some fancy assembly
> syntax to tell the compiler "oops, memory may have changed". However
> in this case, it may be easier to just mark the var volatile.
>
>>
>> Marek
>>
>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>
>>>> This will be used by radeon and amdgpu winsyses.
>>>> Copied from the amdgpu winsys.
>>>> ---
>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>> index f7e4ca4..63b6879 100644
>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>> @@ -33,11 +33,12 @@
>>>>   */
>>>>
>>>>
>>>> -#include "pipe/p_config.h"
>>>> +#include "pipe/p_defines.h"
>>>>
>>>>  #if defined(PIPE_OS_UNIX)
>>>>  #  include <time.h> /* timeval */
>>>>  #  include <sys/time.h> /* timeval */
>>>> +#  include <sched.h> /* sched_yield */
>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>  #  include <windows.h>
>>>>  #else
>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>  }
>>>>
>>>>  #endif
>>>> +
>>>> +
>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>
>>> Does this need to be volatile?
>>>
>>>> +{
>>>> +   if (!*var)
>>>> +      return true;
>>>> +
>>>> +   if (!timeout)
>>>> +      return false;
>>>> +
>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>> +      while (*var) {
>>>> +#if defined(PIPE_OS_UNIX)
>>>> +         sched_yield();
>>>> +#endif
>>>> +      }
>>>> +      return true;
>>>> +   }
>>>> +   else {
>>>> +      int64_t start_time = os_time_get_nano();
>>>> +      int64_t end_time = start_time + timeout;
>>>> +
>>>> +      while (*var) {
>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>> +            return false;
>>>> +
>>>> +#if defined(PIPE_OS_UNIX)
>>>> +         sched_yield();
>>>> +#endif
>>>> +      }
>>>> +      return true;
>>>> +   }
>>>> +}
>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>> index 4fab03c..fdc8040 100644
>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>  }
>>>>
>>>>
>>>> +/**
>>>> + * Wait until the variable at the given memory location is zero.
>>>> + *
>>>> + * \param var           variable
>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>> + * \return     true if the variable is zero
>>>> + */
>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>> +
>>>>  #ifdef __cplusplus
>>>>  }
>>>>  #endif
>>>> --
>>>> 2.1.0
>>>>
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev@lists.freedesktop.org
>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
The compiler doesn't know that there's another thread. And it won't
start to assume that there might always be another thread because then
it could never optimize pointer derefs.

On Fri, Jun 26, 2015 at 11:55 AM, Marek Olšák <maraeo@gmail.com> wrote:
> I assumed the atomic operation in another thread would act as a
> barrier in this case. Is that right?
>
> Marek
>
> On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>> I expect the variable will be changed using an atomic operation by the
>>> CPU, or using a coherent store instruction by the GPU.
>>>
>>> If this is wrong and volatile is really required here, then
>>> p_atomic_read is wrong too. Should we fix it? For example:
>>>
>>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>>
>>> Then, os_wait_until_zero can use p_atomic_read.
>>
>> The problem is the lack of a memory barrier. A function call to
>> sched_yield implies a barrier (since the function could well have
>> changed the memory that var points to), and so it's all fine. But on a
>> non-PIPE_OS_UNIX os, this becomes
>>
>> while (*var);
>>
>> Which the compiler would be well within its right to rewrite to
>>
>> x = *var;
>> while (x);
>>
>> which wouldn't end well. You need a barrier that tells the compiler
>> that memory may be invalidated... I believe this maps to either mb()
>> or barrier() in the linux kernel, and they have some fancy assembly
>> syntax to tell the compiler "oops, memory may have changed". However
>> in this case, it may be easier to just mark the var volatile.
>>
>>>
>>> Marek
>>>
>>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>>
>>>>> This will be used by radeon and amdgpu winsyses.
>>>>> Copied from the amdgpu winsys.
>>>>> ---
>>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>>> index f7e4ca4..63b6879 100644
>>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>>> @@ -33,11 +33,12 @@
>>>>>   */
>>>>>
>>>>>
>>>>> -#include "pipe/p_config.h"
>>>>> +#include "pipe/p_defines.h"
>>>>>
>>>>>  #if defined(PIPE_OS_UNIX)
>>>>>  #  include <time.h> /* timeval */
>>>>>  #  include <sys/time.h> /* timeval */
>>>>> +#  include <sched.h> /* sched_yield */
>>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>>  #  include <windows.h>
>>>>>  #else
>>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>>  }
>>>>>
>>>>>  #endif
>>>>> +
>>>>> +
>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>>
>>>> Does this need to be volatile?
>>>>
>>>>> +{
>>>>> +   if (!*var)
>>>>> +      return true;
>>>>> +
>>>>> +   if (!timeout)
>>>>> +      return false;
>>>>> +
>>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>>> +      while (*var) {
>>>>> +#if defined(PIPE_OS_UNIX)
>>>>> +         sched_yield();
>>>>> +#endif
>>>>> +      }
>>>>> +      return true;
>>>>> +   }
>>>>> +   else {
>>>>> +      int64_t start_time = os_time_get_nano();
>>>>> +      int64_t end_time = start_time + timeout;
>>>>> +
>>>>> +      while (*var) {
>>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>>> +            return false;
>>>>> +
>>>>> +#if defined(PIPE_OS_UNIX)
>>>>> +         sched_yield();
>>>>> +#endif
>>>>> +      }
>>>>> +      return true;
>>>>> +   }
>>>>> +}
>>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>>> index 4fab03c..fdc8040 100644
>>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>>  }
>>>>>
>>>>>
>>>>> +/**
>>>>> + * Wait until the variable at the given memory location is zero.
>>>>> + *
>>>>> + * \param var           variable
>>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>>> + * \return     true if the variable is zero
>>>>> + */
>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>>> +
>>>>>  #ifdef __cplusplus
>>>>>  }
>>>>>  #endif
>>>>> --
>>>>> 2.1.0
>>>>>
>>>>> _______________________________________________
>>>>> mesa-dev mailing list
>>>>> mesa-dev@lists.freedesktop.org
>>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
My question was how to fix p_atomic_read? Would the volatile read that
I proposed work?

Marek


On Fri, Jun 26, 2015 at 5:59 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
> The compiler doesn't know that there's another thread. And it won't
> start to assume that there might always be another thread because then
> it could never optimize pointer derefs.
>
> On Fri, Jun 26, 2015 at 11:55 AM, Marek Olšák <maraeo@gmail.com> wrote:
>> I assumed the atomic operation in another thread would act as a
>> barrier in this case. Is that right?
>>
>> Marek
>>
>> On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>> I expect the variable will be changed using an atomic operation by the
>>>> CPU, or using a coherent store instruction by the GPU.
>>>>
>>>> If this is wrong and volatile is really required here, then
>>>> p_atomic_read is wrong too. Should we fix it? For example:
>>>>
>>>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>>>
>>>> Then, os_wait_until_zero can use p_atomic_read.
>>>
>>> The problem is the lack of a memory barrier. A function call to
>>> sched_yield implies a barrier (since the function could well have
>>> changed the memory that var points to), and so it's all fine. But on a
>>> non-PIPE_OS_UNIX os, this becomes
>>>
>>> while (*var);
>>>
>>> Which the compiler would be well within its right to rewrite to
>>>
>>> x = *var;
>>> while (x);
>>>
>>> which wouldn't end well. You need a barrier that tells the compiler
>>> that memory may be invalidated... I believe this maps to either mb()
>>> or barrier() in the linux kernel, and they have some fancy assembly
>>> syntax to tell the compiler "oops, memory may have changed". However
>>> in this case, it may be easier to just mark the var volatile.
>>>
>>>>
>>>> Marek
>>>>
>>>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>>>
>>>>>> This will be used by radeon and amdgpu winsyses.
>>>>>> Copied from the amdgpu winsys.
>>>>>> ---
>>>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>>>> index f7e4ca4..63b6879 100644
>>>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>>>> @@ -33,11 +33,12 @@
>>>>>>   */
>>>>>>
>>>>>>
>>>>>> -#include "pipe/p_config.h"
>>>>>> +#include "pipe/p_defines.h"
>>>>>>
>>>>>>  #if defined(PIPE_OS_UNIX)
>>>>>>  #  include <time.h> /* timeval */
>>>>>>  #  include <sys/time.h> /* timeval */
>>>>>> +#  include <sched.h> /* sched_yield */
>>>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>>>  #  include <windows.h>
>>>>>>  #else
>>>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>>>  }
>>>>>>
>>>>>>  #endif
>>>>>> +
>>>>>> +
>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>>>
>>>>> Does this need to be volatile?
>>>>>
>>>>>> +{
>>>>>> +   if (!*var)
>>>>>> +      return true;
>>>>>> +
>>>>>> +   if (!timeout)
>>>>>> +      return false;
>>>>>> +
>>>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>>>> +      while (*var) {
>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>> +         sched_yield();
>>>>>> +#endif
>>>>>> +      }
>>>>>> +      return true;
>>>>>> +   }
>>>>>> +   else {
>>>>>> +      int64_t start_time = os_time_get_nano();
>>>>>> +      int64_t end_time = start_time + timeout;
>>>>>> +
>>>>>> +      while (*var) {
>>>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>>>> +            return false;
>>>>>> +
>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>> +         sched_yield();
>>>>>> +#endif
>>>>>> +      }
>>>>>> +      return true;
>>>>>> +   }
>>>>>> +}
>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>>>> index 4fab03c..fdc8040 100644
>>>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>>>  }
>>>>>>
>>>>>>
>>>>>> +/**
>>>>>> + * Wait until the variable at the given memory location is zero.
>>>>>> + *
>>>>>> + * \param var           variable
>>>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>>>> + * \return     true if the variable is zero
>>>>>> + */
>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>>>> +
>>>>>>  #ifdef __cplusplus
>>>>>>  }
>>>>>>  #endif
>>>>>> --
>>>>>> 2.1.0
>>>>>>
>>>>>> _______________________________________________
>>>>>> mesa-dev mailing list
>>>>>> mesa-dev@lists.freedesktop.org
>>>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
p_atomic_read is fine as-is. The read is guaranteed to be atomic up to
a word size on x86 processors. I suspect other cpu's have similar
guarantees, and if not, then hopefully have other ops to perform the
read atomically.

On Fri, Jun 26, 2015 at 12:18 PM, Marek Olšák <maraeo@gmail.com> wrote:
> My question was how to fix p_atomic_read? Would the volatile read that
> I proposed work?
>
> Marek
>
>
> On Fri, Jun 26, 2015 at 5:59 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>> The compiler doesn't know that there's another thread. And it won't
>> start to assume that there might always be another thread because then
>> it could never optimize pointer derefs.
>>
>> On Fri, Jun 26, 2015 at 11:55 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>> I assumed the atomic operation in another thread would act as a
>>> barrier in this case. Is that right?
>>>
>>> Marek
>>>
>>> On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>> I expect the variable will be changed using an atomic operation by the
>>>>> CPU, or using a coherent store instruction by the GPU.
>>>>>
>>>>> If this is wrong and volatile is really required here, then
>>>>> p_atomic_read is wrong too. Should we fix it? For example:
>>>>>
>>>>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>>>>
>>>>> Then, os_wait_until_zero can use p_atomic_read.
>>>>
>>>> The problem is the lack of a memory barrier. A function call to
>>>> sched_yield implies a barrier (since the function could well have
>>>> changed the memory that var points to), and so it's all fine. But on a
>>>> non-PIPE_OS_UNIX os, this becomes
>>>>
>>>> while (*var);
>>>>
>>>> Which the compiler would be well within its right to rewrite to
>>>>
>>>> x = *var;
>>>> while (x);
>>>>
>>>> which wouldn't end well. You need a barrier that tells the compiler
>>>> that memory may be invalidated... I believe this maps to either mb()
>>>> or barrier() in the linux kernel, and they have some fancy assembly
>>>> syntax to tell the compiler "oops, memory may have changed". However
>>>> in this case, it may be easier to just mark the var volatile.
>>>>
>>>>>
>>>>> Marek
>>>>>
>>>>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>>>>
>>>>>>> This will be used by radeon and amdgpu winsyses.
>>>>>>> Copied from the amdgpu winsys.
>>>>>>> ---
>>>>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>>>>> index f7e4ca4..63b6879 100644
>>>>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>>>>> @@ -33,11 +33,12 @@
>>>>>>>   */
>>>>>>>
>>>>>>>
>>>>>>> -#include "pipe/p_config.h"
>>>>>>> +#include "pipe/p_defines.h"
>>>>>>>
>>>>>>>  #if defined(PIPE_OS_UNIX)
>>>>>>>  #  include <time.h> /* timeval */
>>>>>>>  #  include <sys/time.h> /* timeval */
>>>>>>> +#  include <sched.h> /* sched_yield */
>>>>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>>>>  #  include <windows.h>
>>>>>>>  #else
>>>>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>>>>  }
>>>>>>>
>>>>>>>  #endif
>>>>>>> +
>>>>>>> +
>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>>>>
>>>>>> Does this need to be volatile?
>>>>>>
>>>>>>> +{
>>>>>>> +   if (!*var)
>>>>>>> +      return true;
>>>>>>> +
>>>>>>> +   if (!timeout)
>>>>>>> +      return false;
>>>>>>> +
>>>>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>>>>> +      while (*var) {
>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>> +         sched_yield();
>>>>>>> +#endif
>>>>>>> +      }
>>>>>>> +      return true;
>>>>>>> +   }
>>>>>>> +   else {
>>>>>>> +      int64_t start_time = os_time_get_nano();
>>>>>>> +      int64_t end_time = start_time + timeout;
>>>>>>> +
>>>>>>> +      while (*var) {
>>>>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>>>>> +            return false;
>>>>>>> +
>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>> +         sched_yield();
>>>>>>> +#endif
>>>>>>> +      }
>>>>>>> +      return true;
>>>>>>> +   }
>>>>>>> +}
>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>>>>> index 4fab03c..fdc8040 100644
>>>>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>>>>  }
>>>>>>>
>>>>>>>
>>>>>>> +/**
>>>>>>> + * Wait until the variable at the given memory location is zero.
>>>>>>> + *
>>>>>>> + * \param var           variable
>>>>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>>>>> + * \return     true if the variable is zero
>>>>>>> + */
>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>>>>> +
>>>>>>>  #ifdef __cplusplus
>>>>>>>  }
>>>>>>>  #endif
>>>>>>> --
>>>>>>> 2.1.0
>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> mesa-dev mailing list
>>>>>>> mesa-dev@lists.freedesktop.org
>>>>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
If p_atomic_read is fine, then this patch is fine too. So you're
telling that this should work:

while (p_atomic_read(var));

I wouldn't be concerned about a memory barrier. This is only 1 int, so
it should make its way into the shared cache eventually.

Marek


On Fri, Jun 26, 2015 at 6:25 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
> p_atomic_read is fine as-is. The read is guaranteed to be atomic up to
> a word size on x86 processors. I suspect other cpu's have similar
> guarantees, and if not, then hopefully have other ops to perform the
> read atomically.
>
> On Fri, Jun 26, 2015 at 12:18 PM, Marek Olšák <maraeo@gmail.com> wrote:
>> My question was how to fix p_atomic_read? Would the volatile read that
>> I proposed work?
>>
>> Marek
>>
>>
>> On Fri, Jun 26, 2015 at 5:59 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>> The compiler doesn't know that there's another thread. And it won't
>>> start to assume that there might always be another thread because then
>>> it could never optimize pointer derefs.
>>>
>>> On Fri, Jun 26, 2015 at 11:55 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>> I assumed the atomic operation in another thread would act as a
>>>> barrier in this case. Is that right?
>>>>
>>>> Marek
>>>>
>>>> On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>> I expect the variable will be changed using an atomic operation by the
>>>>>> CPU, or using a coherent store instruction by the GPU.
>>>>>>
>>>>>> If this is wrong and volatile is really required here, then
>>>>>> p_atomic_read is wrong too. Should we fix it? For example:
>>>>>>
>>>>>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>>>>>
>>>>>> Then, os_wait_until_zero can use p_atomic_read.
>>>>>
>>>>> The problem is the lack of a memory barrier. A function call to
>>>>> sched_yield implies a barrier (since the function could well have
>>>>> changed the memory that var points to), and so it's all fine. But on a
>>>>> non-PIPE_OS_UNIX os, this becomes
>>>>>
>>>>> while (*var);
>>>>>
>>>>> Which the compiler would be well within its right to rewrite to
>>>>>
>>>>> x = *var;
>>>>> while (x);
>>>>>
>>>>> which wouldn't end well. You need a barrier that tells the compiler
>>>>> that memory may be invalidated... I believe this maps to either mb()
>>>>> or barrier() in the linux kernel, and they have some fancy assembly
>>>>> syntax to tell the compiler "oops, memory may have changed". However
>>>>> in this case, it may be easier to just mark the var volatile.
>>>>>
>>>>>>
>>>>>> Marek
>>>>>>
>>>>>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>>>>>
>>>>>>>> This will be used by radeon and amdgpu winsyses.
>>>>>>>> Copied from the amdgpu winsys.
>>>>>>>> ---
>>>>>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>>>>>> index f7e4ca4..63b6879 100644
>>>>>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>>>>>> @@ -33,11 +33,12 @@
>>>>>>>>   */
>>>>>>>>
>>>>>>>>
>>>>>>>> -#include "pipe/p_config.h"
>>>>>>>> +#include "pipe/p_defines.h"
>>>>>>>>
>>>>>>>>  #if defined(PIPE_OS_UNIX)
>>>>>>>>  #  include <time.h> /* timeval */
>>>>>>>>  #  include <sys/time.h> /* timeval */
>>>>>>>> +#  include <sched.h> /* sched_yield */
>>>>>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>>>>>  #  include <windows.h>
>>>>>>>>  #else
>>>>>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>>>>>  }
>>>>>>>>
>>>>>>>>  #endif
>>>>>>>> +
>>>>>>>> +
>>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>>>>>
>>>>>>> Does this need to be volatile?
>>>>>>>
>>>>>>>> +{
>>>>>>>> +   if (!*var)
>>>>>>>> +      return true;
>>>>>>>> +
>>>>>>>> +   if (!timeout)
>>>>>>>> +      return false;
>>>>>>>> +
>>>>>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>>>>>> +      while (*var) {
>>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>>> +         sched_yield();
>>>>>>>> +#endif
>>>>>>>> +      }
>>>>>>>> +      return true;
>>>>>>>> +   }
>>>>>>>> +   else {
>>>>>>>> +      int64_t start_time = os_time_get_nano();
>>>>>>>> +      int64_t end_time = start_time + timeout;
>>>>>>>> +
>>>>>>>> +      while (*var) {
>>>>>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>>>>>> +            return false;
>>>>>>>> +
>>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>>> +         sched_yield();
>>>>>>>> +#endif
>>>>>>>> +      }
>>>>>>>> +      return true;
>>>>>>>> +   }
>>>>>>>> +}
>>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>>>>>> index 4fab03c..fdc8040 100644
>>>>>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>>>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>>>>>  }
>>>>>>>>
>>>>>>>>
>>>>>>>> +/**
>>>>>>>> + * Wait until the variable at the given memory location is zero.
>>>>>>>> + *
>>>>>>>> + * \param var           variable
>>>>>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>>>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>>>>>> + * \return     true if the variable is zero
>>>>>>>> + */
>>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>>>>>> +
>>>>>>>>  #ifdef __cplusplus
>>>>>>>>  }
>>>>>>>>  #endif
>>>>>>>> --
>>>>>>>> 2.1.0
>>>>>>>>
>>>>>>>> _______________________________________________
>>>>>>>> mesa-dev mailing list
>>>>>>>> mesa-dev@lists.freedesktop.org
>>>>>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Fri, Jun 26, 2015 at 12:40 PM, Marek Olšák <maraeo@gmail.com> wrote:
> If p_atomic_read is fine, then this patch is fine too. So you're
> telling that this should work:
>
> while (p_atomic_read(var));

No, this shouldn't work. I don't believe that anyone ever claimed it
ought to. But perhaps we have different ideas of what "p_atomic_read"
should do. I see it as "atomically read a word from memory in a way
that you can never get partial results". The compiler can still
happily optimize this to

x = p_atomic_read(var) (assuming p_atomic_read isn't an out-of-line function)
while (x);

>
> I wouldn't be concerned about a memory barrier. This is only 1 int, so
> it should make its way into the shared cache eventually.

The barrier is for the compiler to know what's going on, not for any
hardware reasons. I *believe* this is the barrier() macro in the
kernel (mb() is an actual memory barrier issued on the CPU).

So something like

#define barrier() __asm__ __volatile__("": : :"memory")

while (*var) {
  barrier();
}

Should have the desired effect. [Or while (p_atomic_read(var)) ... whatever]

>
> Marek
>
>
> On Fri, Jun 26, 2015 at 6:25 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>> p_atomic_read is fine as-is. The read is guaranteed to be atomic up to
>> a word size on x86 processors. I suspect other cpu's have similar
>> guarantees, and if not, then hopefully have other ops to perform the
>> read atomically.
>>
>> On Fri, Jun 26, 2015 at 12:18 PM, Marek Olšák <maraeo@gmail.com> wrote:
>>> My question was how to fix p_atomic_read? Would the volatile read that
>>> I proposed work?
>>>
>>> Marek
>>>
>>>
>>> On Fri, Jun 26, 2015 at 5:59 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>> The compiler doesn't know that there's another thread. And it won't
>>>> start to assume that there might always be another thread because then
>>>> it could never optimize pointer derefs.
>>>>
>>>> On Fri, Jun 26, 2015 at 11:55 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>> I assumed the atomic operation in another thread would act as a
>>>>> barrier in this case. Is that right?
>>>>>
>>>>> Marek
>>>>>
>>>>> On Fri, Jun 26, 2015 at 5:47 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>>> On Fri, Jun 26, 2015 at 11:33 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>>> I expect the variable will be changed using an atomic operation by the
>>>>>>> CPU, or using a coherent store instruction by the GPU.
>>>>>>>
>>>>>>> If this is wrong and volatile is really required here, then
>>>>>>> p_atomic_read is wrong too. Should we fix it? For example:
>>>>>>>
>>>>>>> #define p_atomic_read(_v) (*(volatile int*)(_v))
>>>>>>>
>>>>>>> Then, os_wait_until_zero can use p_atomic_read.
>>>>>>
>>>>>> The problem is the lack of a memory barrier. A function call to
>>>>>> sched_yield implies a barrier (since the function could well have
>>>>>> changed the memory that var points to), and so it's all fine. But on a
>>>>>> non-PIPE_OS_UNIX os, this becomes
>>>>>>
>>>>>> while (*var);
>>>>>>
>>>>>> Which the compiler would be well within its right to rewrite to
>>>>>>
>>>>>> x = *var;
>>>>>> while (x);
>>>>>>
>>>>>> which wouldn't end well. You need a barrier that tells the compiler
>>>>>> that memory may be invalidated... I believe this maps to either mb()
>>>>>> or barrier() in the linux kernel, and they have some fancy assembly
>>>>>> syntax to tell the compiler "oops, memory may have changed". However
>>>>>> in this case, it may be easier to just mark the var volatile.
>>>>>>
>>>>>>>
>>>>>>> Marek
>>>>>>>
>>>>>>> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>>>>>>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>>>>>>>> From: Marek Olšák <marek.olsak@amd.com>
>>>>>>>>>
>>>>>>>>> This will be used by radeon and amdgpu winsyses.
>>>>>>>>> Copied from the amdgpu winsys.
>>>>>>>>> ---
>>>>>>>>>  src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>>>>>>>  src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>>>>>>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>>>>>>>> index f7e4ca4..63b6879 100644
>>>>>>>>> --- a/src/gallium/auxiliary/os/os_time.c
>>>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>>>>>>>> @@ -33,11 +33,12 @@
>>>>>>>>>   */
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> -#include "pipe/p_config.h"
>>>>>>>>> +#include "pipe/p_defines.h"
>>>>>>>>>
>>>>>>>>>  #if defined(PIPE_OS_UNIX)
>>>>>>>>>  #  include <time.h> /* timeval */
>>>>>>>>>  #  include <sys/time.h> /* timeval */
>>>>>>>>> +#  include <sched.h> /* sched_yield */
>>>>>>>>>  #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>>>>>>>  #  include <windows.h>
>>>>>>>>>  #else
>>>>>>>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>>  #endif
>>>>>>>>> +
>>>>>>>>> +
>>>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>>>>>>>
>>>>>>>> Does this need to be volatile?
>>>>>>>>
>>>>>>>>> +{
>>>>>>>>> +   if (!*var)
>>>>>>>>> +      return true;
>>>>>>>>> +
>>>>>>>>> +   if (!timeout)
>>>>>>>>> +      return false;
>>>>>>>>> +
>>>>>>>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>>>>>>>> +      while (*var) {
>>>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>>>> +         sched_yield();
>>>>>>>>> +#endif
>>>>>>>>> +      }
>>>>>>>>> +      return true;
>>>>>>>>> +   }
>>>>>>>>> +   else {
>>>>>>>>> +      int64_t start_time = os_time_get_nano();
>>>>>>>>> +      int64_t end_time = start_time + timeout;
>>>>>>>>> +
>>>>>>>>> +      while (*var) {
>>>>>>>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>>>>>>>> +            return false;
>>>>>>>>> +
>>>>>>>>> +#if defined(PIPE_OS_UNIX)
>>>>>>>>> +         sched_yield();
>>>>>>>>> +#endif
>>>>>>>>> +      }
>>>>>>>>> +      return true;
>>>>>>>>> +   }
>>>>>>>>> +}
>>>>>>>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>>>>>>>> index 4fab03c..fdc8040 100644
>>>>>>>>> --- a/src/gallium/auxiliary/os/os_time.h
>>>>>>>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>>>>>>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> +/**
>>>>>>>>> + * Wait until the variable at the given memory location is zero.
>>>>>>>>> + *
>>>>>>>>> + * \param var           variable
>>>>>>>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>>>>>>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>>>>>>>> + * \return     true if the variable is zero
>>>>>>>>> + */
>>>>>>>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>>>>>>>> +
>>>>>>>>>  #ifdef __cplusplus
>>>>>>>>>  }
>>>>>>>>>  #endif
>>>>>>>>> --
>>>>>>>>> 2.1.0
>>>>>>>>>
>>>>>>>>> _______________________________________________
>>>>>>>>> mesa-dev mailing list
>>>>>>>>> mesa-dev@lists.freedesktop.org
>>>>>>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Fri, Jun 26, 2015 at 11:40 AM, Marek Olšák <maraeo@gmail.com> wrote:

> If p_atomic_read is fine, then this patch is fine too. So you're
> telling that this should work:
>
> while (p_atomic_read(var));
>
> I wouldn't be concerned about a memory barrier. This is only 1 int, so
> it should make its way into the shared cache eventually.
>
>
Yes, it does make it to the shared cache, but the assumption is that the
compiler will actually generate code to check the memory location more than
one. I've personally been bitten by this assumption - it's a bad one. Ilia
is right. If you have a variable that doesn't appear to modified at all,
but you, the programmer know it will be modified by another thread, you're
asking for an infinite loop. The only guarantee you get is that if this
code ran in isolation on a single thread, it will do what you told it to.
Consider even a trivial transformation:

while(1) {

    if(var == 0) break;

}

The compiler can optimize this to a single statement:

if(var != 0) infinite_loop();

...because it produces the same results as the above code when run in
isolation. However, if 'var' is volilate, it cannot assume that the value
will remain the same and cannot apply this "optimization". What's more fun
is that debug mode tends to not apply these sorts of optimizations, so your
code hangs in release builds, and when you check the memory location, you
can see that it has been updated. Commence tearing hair out. Then you look
at the assembly and hit your head on the desk. Or something like that. ;)

Patrick
As others pointed, volatile and atomic are slightly different things, 
but you have point: atomic operations should probably take volatile 
pointers as arguments.

This is what C11 did

   http://en.cppreference.com/w/c/atomic/atomic_load

so I do believe that it makes sense to update p_atomic helpers to match 
(as one day hopefully we'll replace everything with stdatomic.h)

Jose


On 26/06/15 16:33, Marek Olšák wrote:
> I expect the variable will be changed using an atomic operation by the
> CPU, or using a coherent store instruction by the GPU.
>
> If this is wrong and volatile is really required here, then
> p_atomic_read is wrong too. Should we fix it? For example:
>
> #define p_atomic_read(_v) (*(volatile int*)(_v))
>
> Then, os_wait_until_zero can use p_atomic_read.
>
> Marek
>
> On Fri, Jun 26, 2015 at 4:48 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>> On Fri, Jun 26, 2015 at 7:05 AM, Marek Olšák <maraeo@gmail.com> wrote:
>>> From: Marek Olšák <marek.olsak@amd.com>
>>>
>>> This will be used by radeon and amdgpu winsyses.
>>> Copied from the amdgpu winsys.
>>> ---
>>>   src/gallium/auxiliary/os/os_time.c | 36 +++++++++++++++++++++++++++++++++++-
>>>   src/gallium/auxiliary/os/os_time.h | 10 ++++++++++
>>>   2 files changed, 45 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
>>> index f7e4ca4..63b6879 100644
>>> --- a/src/gallium/auxiliary/os/os_time.c
>>> +++ b/src/gallium/auxiliary/os/os_time.c
>>> @@ -33,11 +33,12 @@
>>>    */
>>>
>>>
>>> -#include "pipe/p_config.h"
>>> +#include "pipe/p_defines.h"
>>>
>>>   #if defined(PIPE_OS_UNIX)
>>>   #  include <time.h> /* timeval */
>>>   #  include <sys/time.h> /* timeval */
>>> +#  include <sched.h> /* sched_yield */
>>>   #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>>>   #  include <windows.h>
>>>   #else
>>> @@ -92,3 +93,36 @@ os_time_sleep(int64_t usecs)
>>>   }
>>>
>>>   #endif
>>> +
>>> +
>>> +bool os_wait_until_zero(int *var, uint64_t timeout)
>>
>> Does this need to be volatile?
>>
>>> +{
>>> +   if (!*var)
>>> +      return true;
>>> +
>>> +   if (!timeout)
>>> +      return false;
>>> +
>>> +   if (timeout == PIPE_TIMEOUT_INFINITE) {
>>> +      while (*var) {
>>> +#if defined(PIPE_OS_UNIX)
>>> +         sched_yield();
>>> +#endif
>>> +      }
>>> +      return true;
>>> +   }
>>> +   else {
>>> +      int64_t start_time = os_time_get_nano();
>>> +      int64_t end_time = start_time + timeout;
>>> +
>>> +      while (*var) {
>>> +         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
>>> +            return false;
>>> +
>>> +#if defined(PIPE_OS_UNIX)
>>> +         sched_yield();
>>> +#endif
>>> +      }
>>> +      return true;
>>> +   }
>>> +}
>>> diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
>>> index 4fab03c..fdc8040 100644
>>> --- a/src/gallium/auxiliary/os/os_time.h
>>> +++ b/src/gallium/auxiliary/os/os_time.h
>>> @@ -94,6 +94,16 @@ os_time_timeout(int64_t start,
>>>   }
>>>
>>>
>>> +/**
>>> + * Wait until the variable at the given memory location is zero.
>>> + *
>>> + * \param var           variable
>>> + * \param timeout       timeout, can be anything from 0 (no wait) to
>>> + *                      PIPE_TIME_INFINITE (wait forever)
>>> + * \return     true if the variable is zero
>>> + */
>>> +bool os_wait_until_zero(int *var, uint64_t timeout);
>>> +
>>>   #ifdef __cplusplus
>>>   }
>>>   #endif
>>> --
>>> 2.1.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>