Message ID | 1434223702-30222-1-git-send-email-awatry@gmail.com |
---|---|
State | New |
Headers | show |
diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c index 47e0c7a..6cdd718 100644 --- a/tests/util/piglit-util-cl.c +++ b/tests/util/piglit-util-cl.c @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, uint32_t ulp) diff = fabsf(value - expect); - if(diff > ulp || isnan(value)) { + if (diff > t.f || isnan(value)) { printf("Expecting %f (0x%x) with tolerance %f (%u ulps), but got %f (0x%x)\n", e.f, e.u, t.f, t.u, v.f, v.u); return false; @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double expect, uint64_t ulp) diff = fabsl(value - expect); - if(diff > ulp || isnan(value)) { + if (diff > t.f || isnan(value)) { printf("Expecting %f (0x%lx) with tolerance %f (%lu ulps), but got %f (0x%lx)\n", e.f, e.u, t.f, t.u, v.f, v.u); return false; @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id platform) int scanf_count; int major; int minor; - + /* * Returned format: * OpenCL<space><major_version.minor_version><space><platform-specific information> @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint param) if(errNo == CL_SUCCESS) { param_ptr = calloc(param_size, sizeof(char)); - + /* retrieve param */ if(fn_ptr == clGetPlatformInfo) { errNo = clGetPlatformInfo(*(cl_platform_id*)obj, param, @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program program, cl_device_id device, .program = program, .device = device }; - + return piglit_cl_get_info(clGetProgramBuildInfo, &args, param); } @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel kernel, cl_device_id device, .kernel = kernel, .device = device }; - + return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, param); } @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id platform_id, cl_device_type device_type, piglit_cl_get_error_name(errNo)); return 0; } - + /* get device list */ if(device_ids != NULL && num_device_ids > 0) { *device_ids = malloc(num_device_ids * sizeof(cl_device_id)); @@ -761,7 +761,7 @@ piglit_cl_build_program_with_source_extended(piglit_cl_context context, piglit_cl_get_error_name(errNo)); return NULL; } - + errNo = clBuildProgram(program, context->num_devices, context->device_ids, @@ -788,7 +788,7 @@ piglit_cl_build_program_with_source_extended(piglit_cl_context context, char* log = piglit_cl_get_program_build_info(program, context->device_ids[i], CL_PROGRAM_BUILD_LOG); - + printf("Build log for device %s:\n -------- \n%s\n -------- \n", device_name, log); @@ -848,11 +848,11 @@ piglit_cl_build_program_with_binary_extended(piglit_cl_context context, for(i = 0; i < context->num_devices; i++) { char* device_name = piglit_cl_get_device_info(context->device_ids[i], CL_DEVICE_NAME); - + printf("Error for %s: %s\n", device_name, piglit_cl_get_error_name(binary_status[i])); - + free(device_name); } @@ -860,7 +860,7 @@ piglit_cl_build_program_with_binary_extended(piglit_cl_context context, return NULL; } free(binary_status); - + errNo = clBuildProgram(program, context->num_devices, context->device_ids, @@ -884,11 +884,11 @@ piglit_cl_build_program_with_binary_extended(piglit_cl_context context, char* log = piglit_cl_get_program_build_info(program, context->device_ids[i], CL_PROGRAM_BUILD_LOG); - + printf("Build log for device %s:\n -------- \n%s\n -------- \n", device_name, log); - + free(device_name); free(log); }
Meh, this still feels broken. Give me a bit longer. --Aaron On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> wrote: > We need to actually check against the float value from the union, > instead of just doing (diff > ulp), which seems to cast the diff to > an int before checking against ulp. > > Signed-off-by: Aaron Watry <awatry@gmail.com> > CC: Tom Stellard <thomas.stellard@amd.com> > CC: Jan Vesely <jan.vesely@rutgers.edu> > --- > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > 1 file changed, 14 insertions(+), 14 deletions(-) > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c > index 47e0c7a..6cdd718 100644 > --- a/tests/util/piglit-util-cl.c > +++ b/tests/util/piglit-util-cl.c > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, > uint32_t ulp) > > diff = fabsf(value - expect); > > - if(diff > ulp || isnan(value)) { > + if (diff > t.f || isnan(value)) { > printf("Expecting %f (0x%x) with tolerance %f (%u ulps), > but got %f (0x%x)\n", > e.f, e.u, t.f, t.u, v.f, v.u); > return false; > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double expect, > uint64_t ulp) > > diff = fabsl(value - expect); > > - if(diff > ulp || isnan(value)) { > + if (diff > t.f || isnan(value)) { > printf("Expecting %f (0x%lx) with tolerance %f (%lu ulps), > but got %f (0x%lx)\n", > e.f, e.u, t.f, t.u, v.f, v.u); > return false; > @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id platform) > int scanf_count; > int major; > int minor; > - > + > /* > * Returned format: > * > OpenCL<space><major_version.minor_version><space><platform-specific > information> > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint > param) > > if(errNo == CL_SUCCESS) { > param_ptr = calloc(param_size, sizeof(char)); > - > + > /* retrieve param */ > if(fn_ptr == clGetPlatformInfo) { > errNo = clGetPlatformInfo(*(cl_platform_id*)obj, > param, > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program program, > cl_device_id device, > .program = program, > .device = device > }; > - > + > return piglit_cl_get_info(clGetProgramBuildInfo, &args, param); > } > > @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel kernel, > cl_device_id device, > .kernel = kernel, > .device = device > }; > - > + > return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, param); > } > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id platform_id, > cl_device_type device_type, > piglit_cl_get_error_name(errNo)); > return 0; > } > - > + > /* get device list */ > if(device_ids != NULL && num_device_ids > 0) { > *device_ids = malloc(num_device_ids * > sizeof(cl_device_id)); > @@ -761,7 +761,7 @@ > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > piglit_cl_get_error_name(errNo)); > return NULL; > } > - > + > errNo = clBuildProgram(program, > context->num_devices, > context->device_ids, > @@ -788,7 +788,7 @@ > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > char* log = > piglit_cl_get_program_build_info(program, > > context->device_ids[i], > > CL_PROGRAM_BUILD_LOG); > - > + > printf("Build log for device %s:\n -------- \n%s\n > -------- \n", > device_name, > log); > @@ -848,11 +848,11 @@ > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > for(i = 0; i < context->num_devices; i++) { > char* device_name = > piglit_cl_get_device_info(context->device_ids[i], > > CL_DEVICE_NAME); > - > + > printf("Error for %s: %s\n", > device_name, > piglit_cl_get_error_name(binary_status[i])); > - > + > free(device_name); > } > > @@ -860,7 +860,7 @@ > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > return NULL; > } > free(binary_status); > - > + > errNo = clBuildProgram(program, > context->num_devices, > context->device_ids, > @@ -884,11 +884,11 @@ > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > char* log = > piglit_cl_get_program_build_info(program, > > context->device_ids[i], > > CL_PROGRAM_BUILD_LOG); > - > + > printf("Build log for device %s:\n -------- \n%s\n > -------- \n", > device_name, > log); > - > + > free(device_name); > free(log); > } > -- > 2.1.4 > >
On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: > Meh, this still feels broken. Give me a bit longer. and it is :). I don't think this can work based on abs(expected - real), since ULP depends on the magnitude of the numbers. This information is lost after subtraction. I had an idea some time back to implement this using nextafterf in both directions and checking whether the result falls in that interval. However, there is still a problem. Some of the expected values are already rounded (I'm not sure what rounding mode is used by python by default), but unless it always rounds in one direction, we'll still get slight differences based on whether the actual value was rounded up or down. I have attached a small test program that shows the deficiencies of fabsf based approaches. regards, Jan > > --Aaron > > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> wrote: > > > We need to actually check against the float value from the union, > > instead of just doing (diff > ulp), which seems to cast the diff to > > an int before checking against ulp. > > > > Signed-off-by: Aaron Watry <awatry@gmail.com> > > CC: Tom Stellard <thomas.stellard@amd.com> > > CC: Jan Vesely <jan.vesely@rutgers.edu> > > --- > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > > 1 file changed, 14 insertions(+), 14 deletions(-) > > > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c > > index 47e0c7a..6cdd718 100644 > > --- a/tests/util/piglit-util-cl.c > > +++ b/tests/util/piglit-util-cl.c > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, > > uint32_t ulp) > > > > diff = fabsf(value - expect); > > > > - if(diff > ulp || isnan(value)) { > > + if (diff > t.f || isnan(value)) { > > printf("Expecting %f (0x%x) with tolerance %f (%u ulps), > > but got %f (0x%x)\n", > > e.f, e.u, t.f, t.u, v.f, v.u); > > return false; > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double expect, > > uint64_t ulp) > > > > diff = fabsl(value - expect); > > > > - if(diff > ulp || isnan(value)) { > > + if (diff > t.f || isnan(value)) { > > printf("Expecting %f (0x%lx) with tolerance %f (%lu ulps), > > but got %f (0x%lx)\n", > > e.f, e.u, t.f, t.u, v.f, v.u); > > return false; > > @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id platform) > > int scanf_count; > > int major; > > int minor; > > - > > + > > /* > > * Returned format: > > * > > OpenCL<space><major_version.minor_version><space><platform-specific > > information> > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint > > param) > > > > if(errNo == CL_SUCCESS) { > > param_ptr = calloc(param_size, sizeof(char)); > > - > > + > > /* retrieve param */ > > if(fn_ptr == clGetPlatformInfo) { > > errNo = clGetPlatformInfo(*(cl_platform_id*)obj, > > param, > > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program program, > > cl_device_id device, > > .program = program, > > .device = device > > }; > > - > > + > > return piglit_cl_get_info(clGetProgramBuildInfo, &args, param); > > } > > > > @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel kernel, > > cl_device_id device, > > .kernel = kernel, > > .device = device > > }; > > - > > + > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, param); > > } > > > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id platform_id, > > cl_device_type device_type, > > piglit_cl_get_error_name(errNo)); > > return 0; > > } > > - > > + > > /* get device list */ > > if(device_ids != NULL && num_device_ids > 0) { > > *device_ids = malloc(num_device_ids * > > sizeof(cl_device_id)); > > @@ -761,7 +761,7 @@ > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > piglit_cl_get_error_name(errNo)); > > return NULL; > > } > > - > > + > > errNo = clBuildProgram(program, > > context->num_devices, > > context->device_ids, > > @@ -788,7 +788,7 @@ > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > char* log = > > piglit_cl_get_program_build_info(program, > > > > context->device_ids[i], > > > > CL_PROGRAM_BUILD_LOG); > > - > > + > > printf("Build log for device %s:\n -------- \n%s\n > > -------- \n", > > device_name, > > log); > > @@ -848,11 +848,11 @@ > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > for(i = 0; i < context->num_devices; i++) { > > char* device_name = > > piglit_cl_get_device_info(context->device_ids[i], > > > > CL_DEVICE_NAME); > > - > > + > > printf("Error for %s: %s\n", > > device_name, > > piglit_cl_get_error_name(binary_status[i])); > > - > > + > > free(device_name); > > } > > > > @@ -860,7 +860,7 @@ > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > return NULL; > > } > > free(binary_status); > > - > > + > > errNo = clBuildProgram(program, > > context->num_devices, > > context->device_ids, > > @@ -884,11 +884,11 @@ > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > char* log = > > piglit_cl_get_program_build_info(program, > > > > context->device_ids[i], > > > > CL_PROGRAM_BUILD_LOG); > > - > > + > > printf("Build log for device %s:\n -------- \n%s\n > > -------- \n", > > device_name, > > log); > > - > > + > > free(device_name); > > free(log); > > } > > -- > > 2.1.4 > > > >
On Sun, Jun 14, 2015 at 4:19 PM, Jan Vesely <jan.vesely@rutgers.edu> wrote: > On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: > > Meh, this still feels broken. Give me a bit longer. > > and it is :). I don't think this can work based on abs(expected - real), > since ULP depends on the magnitude of the numbers. This information is > lost after subtraction. > Yeah, that's essentially the conclusion that I came to last night as well. Currently, we're saying that 3 ULP is 3x the smallest representable floating point number, not 3x the interval between adjacent float values for the given magnitude. I was thinking that we might need to take the expected value, cast to unsigned, add the ulp value, then convert back to float. From there, find the difference between expected and the expected+ulp, and use that as a tolerance. We could alternatively call nextafterf/nextafter the required number of times from the expected value in either direction and get a min/max allowed value and then check that the result value is in that range. Yes, we still run into cases where the tests themselves are expected an incorrectly rounded value, but at least the ULP checking code might be functioning correctly then. Thoughts? --Aaron > I had an idea some time back to implement this using nextafterf in both > directions and checking whether the result falls in that interval. > However, there is still a problem. Some of the expected values are > already rounded (I'm not sure what rounding mode is used by python by > default), but unless it always rounds in one direction, we'll still get > slight differences based on whether the actual value was rounded up or > down. > > I have attached a small test program that shows the deficiencies of > fabsf based approaches. > > regards, > Jan > > > > > --Aaron > > > > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> wrote: > > > > > We need to actually check against the float value from the union, > > > instead of just doing (diff > ulp), which seems to cast the diff to > > > an int before checking against ulp. > > > > > > Signed-off-by: Aaron Watry <awatry@gmail.com> > > > CC: Tom Stellard <thomas.stellard@amd.com> > > > CC: Jan Vesely <jan.vesely@rutgers.edu> > > > --- > > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > > > 1 file changed, 14 insertions(+), 14 deletions(-) > > > > > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c > > > index 47e0c7a..6cdd718 100644 > > > --- a/tests/util/piglit-util-cl.c > > > +++ b/tests/util/piglit-util-cl.c > > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, > > > uint32_t ulp) > > > > > > diff = fabsf(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%x) with tolerance %f (%u > ulps), > > > but got %f (0x%x)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double expect, > > > uint64_t ulp) > > > > > > diff = fabsl(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%lx) with tolerance %f (%lu > ulps), > > > but got %f (0x%lx)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id > platform) > > > int scanf_count; > > > int major; > > > int minor; > > > - > > > + > > > /* > > > * Returned format: > > > * > > > OpenCL<space><major_version.minor_version><space><platform-specific > > > information> > > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint > > > param) > > > > > > if(errNo == CL_SUCCESS) { > > > param_ptr = calloc(param_size, sizeof(char)); > > > - > > > + > > > /* retrieve param */ > > > if(fn_ptr == clGetPlatformInfo) { > > > errNo = > clGetPlatformInfo(*(cl_platform_id*)obj, > > > param, > > > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program > program, > > > cl_device_id device, > > > .program = program, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetProgramBuildInfo, &args, param); > > > } > > > > > > @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel > kernel, > > > cl_device_id device, > > > .kernel = kernel, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, > param); > > > } > > > > > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id > platform_id, > > > cl_device_type device_type, > > > > piglit_cl_get_error_name(errNo)); > > > return 0; > > > } > > > - > > > + > > > /* get device list */ > > > if(device_ids != NULL && num_device_ids > 0) { > > > *device_ids = malloc(num_device_ids * > > > sizeof(cl_device_id)); > > > @@ -761,7 +761,7 @@ > > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > > piglit_cl_get_error_name(errNo)); > > > return NULL; > > > } > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -788,7 +788,7 @@ > > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n -------- > \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > @@ -848,11 +848,11 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > for(i = 0; i < context->num_devices; i++) { > > > char* device_name = > > > piglit_cl_get_device_info(context->device_ids[i], > > > > > > CL_DEVICE_NAME); > > > - > > > + > > > printf("Error for %s: %s\n", > > > device_name, > > > > piglit_cl_get_error_name(binary_status[i])); > > > - > > > + > > > free(device_name); > > > } > > > > > > @@ -860,7 +860,7 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > return NULL; > > > } > > > free(binary_status); > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -884,11 +884,11 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n -------- > \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > - > > > + > > > free(device_name); > > > free(log); > > > } > > > -- > > > 2.1.4 > > > > > > > > > -- > Jan Vesely <jan.vesely@rutgers.edu> >
On Sun, Jun 14, 2015 at 7:48 PM, Aaron Watry <awatry@gmail.com> wrote: > > > On Sun, Jun 14, 2015 at 4:19 PM, Jan Vesely <jan.vesely@rutgers.edu> > wrote: > >> On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: >> > Meh, this still feels broken. Give me a bit longer. >> >> and it is :). I don't think this can work based on abs(expected - real), >> since ULP depends on the magnitude of the numbers. This information is >> lost after subtraction. >> > > Yeah, that's essentially the conclusion that I came to last night as > well. Currently, we're saying that 3 ULP is 3x the smallest representable > floating point number, not 3x the interval between adjacent float values > for the given magnitude. > > I was thinking that we might need to take the expected value, cast to > unsigned, add the ulp value, then convert back to float. From there, find > the difference between expected and the expected+ulp, and use that as a > tolerance. We could alternatively call nextafterf/nextafter the required > number of times from the expected value in either direction and get a > min/max allowed value and then check that the result value is in that range. > I considered casting to ints too. However, it runs into problems in some corner cases (like neg zero, crossing exponent boundaries, ...), nextafter offered an easy way out. although it's still not perfect. When the number sits on exponent boundary, going up or down gives different steps. one example is 2^24: 16777214: 16777214.000000 16777215: 16777215.000000 16777216: 16777216.000000 16777217: 16777216.000000 16777218: 16777218.000000 16777219: 16777220.000000 we could take something like unit = nextafterf(expected) - expected; and then compare against ulp * unit, that way we can even subtract 0.5 unit for python/piglit rounding. Although nextafterf(expected) - expected hits representability issues for very large numbers. however the problem whether to take nextaftef(x, 0.0f) or nextafterf(x, +/- inf) persits. I'm not sure which one we want. if python always rounded to/away from zero, we could always take the opposite, but it would cause problems for non-integer ulp (which we don't support at the moment anyway). Jan > > Yes, we still run into cases where the tests themselves are expected an > incorrectly rounded value, but at least the ULP checking code might be > functioning correctly then. > > Thoughts? > > --Aaron > > >> I had an idea some time back to implement this using nextafterf in both >> directions and checking whether the result falls in that interval. >> However, there is still a problem. Some of the expected values are >> already rounded (I'm not sure what rounding mode is used by python by >> default), but unless it always rounds in one direction, we'll still get >> slight differences based on whether the actual value was rounded up or >> down. >> >> I have attached a small test program that shows the deficiencies of >> fabsf based approaches. >> >> regards, >> Jan >> >> > >> > --Aaron >> > >> > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> wrote: >> > >> > > We need to actually check against the float value from the union, >> > > instead of just doing (diff > ulp), which seems to cast the diff to >> > > an int before checking against ulp. >> > > >> > > Signed-off-by: Aaron Watry <awatry@gmail.com> >> > > CC: Tom Stellard <thomas.stellard@amd.com> >> > > CC: Jan Vesely <jan.vesely@rutgers.edu> >> > > --- >> > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- >> > > 1 file changed, 14 insertions(+), 14 deletions(-) >> > > >> > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c >> > > index 47e0c7a..6cdd718 100644 >> > > --- a/tests/util/piglit-util-cl.c >> > > +++ b/tests/util/piglit-util-cl.c >> > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, >> > > uint32_t ulp) >> > > >> > > diff = fabsf(value - expect); >> > > >> > > - if(diff > ulp || isnan(value)) { >> > > + if (diff > t.f || isnan(value)) { >> > > printf("Expecting %f (0x%x) with tolerance %f (%u >> ulps), >> > > but got %f (0x%x)\n", >> > > e.f, e.u, t.f, t.u, v.f, v.u); >> > > return false; >> > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double >> expect, >> > > uint64_t ulp) >> > > >> > > diff = fabsl(value - expect); >> > > >> > > - if(diff > ulp || isnan(value)) { >> > > + if (diff > t.f || isnan(value)) { >> > > printf("Expecting %f (0x%lx) with tolerance %f (%lu >> ulps), >> > > but got %f (0x%lx)\n", >> > > e.f, e.u, t.f, t.u, v.f, v.u); >> > > return false; >> > > @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id >> platform) >> > > int scanf_count; >> > > int major; >> > > int minor; >> > > - >> > > + >> > > /* >> > > * Returned format: >> > > * >> > > OpenCL<space><major_version.minor_version><space><platform-specific >> > > information> >> > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, >> cl_uint >> > > param) >> > > >> > > if(errNo == CL_SUCCESS) { >> > > param_ptr = calloc(param_size, sizeof(char)); >> > > - >> > > + >> > > /* retrieve param */ >> > > if(fn_ptr == clGetPlatformInfo) { >> > > errNo = >> clGetPlatformInfo(*(cl_platform_id*)obj, >> > > param, >> > > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program >> program, >> > > cl_device_id device, >> > > .program = program, >> > > .device = device >> > > }; >> > > - >> > > + >> > > return piglit_cl_get_info(clGetProgramBuildInfo, &args, >> param); >> > > } >> > > >> > > @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel >> kernel, >> > > cl_device_id device, >> > > .kernel = kernel, >> > > .device = device >> > > }; >> > > - >> > > + >> > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, >> param); >> > > } >> > > >> > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id >> platform_id, >> > > cl_device_type device_type, >> > > >> piglit_cl_get_error_name(errNo)); >> > > return 0; >> > > } >> > > - >> > > + >> > > /* get device list */ >> > > if(device_ids != NULL && num_device_ids > 0) { >> > > *device_ids = malloc(num_device_ids * >> > > sizeof(cl_device_id)); >> > > @@ -761,7 +761,7 @@ >> > > piglit_cl_build_program_with_source_extended(piglit_cl_context >> context, >> > > piglit_cl_get_error_name(errNo)); >> > > return NULL; >> > > } >> > > - >> > > + >> > > errNo = clBuildProgram(program, >> > > context->num_devices, >> > > context->device_ids, >> > > @@ -788,7 +788,7 @@ >> > > piglit_cl_build_program_with_source_extended(piglit_cl_context >> context, >> > > char* log = >> > > piglit_cl_get_program_build_info(program, >> > > >> > > context->device_ids[i], >> > > >> > > CL_PROGRAM_BUILD_LOG); >> > > - >> > > + >> > > printf("Build log for device %s:\n -------- >> \n%s\n >> > > -------- \n", >> > > device_name, >> > > log); >> > > @@ -848,11 +848,11 @@ >> > > piglit_cl_build_program_with_binary_extended(piglit_cl_context >> context, >> > > for(i = 0; i < context->num_devices; i++) { >> > > char* device_name = >> > > piglit_cl_get_device_info(context->device_ids[i], >> > > >> > > CL_DEVICE_NAME); >> > > - >> > > + >> > > printf("Error for %s: %s\n", >> > > device_name, >> > > >> piglit_cl_get_error_name(binary_status[i])); >> > > - >> > > + >> > > free(device_name); >> > > } >> > > >> > > @@ -860,7 +860,7 @@ >> > > piglit_cl_build_program_with_binary_extended(piglit_cl_context >> context, >> > > return NULL; >> > > } >> > > free(binary_status); >> > > - >> > > + >> > > errNo = clBuildProgram(program, >> > > context->num_devices, >> > > context->device_ids, >> > > @@ -884,11 +884,11 @@ >> > > piglit_cl_build_program_with_binary_extended(piglit_cl_context >> context, >> > > char* log = >> > > piglit_cl_get_program_build_info(program, >> > > >> > > context->device_ids[i], >> > > >> > > CL_PROGRAM_BUILD_LOG); >> > > - >> > > + >> > > printf("Build log for device %s:\n -------- >> \n%s\n >> > > -------- \n", >> > > device_name, >> > > log); >> > > - >> > > + >> > > free(device_name); >> > > free(log); >> > > } >> > > -- >> > > 2.1.4 >> > > >> > > >> >> >> -- >> Jan Vesely <jan.vesely@rutgers.edu> >> > >
Just throwing a few ideas/warnings your way :) I ran into these same issues when implementing arb_shader_precision for GLSL. It's not my intention to introduce more questions than answers here. This is all simply coming from a fellow dev who's been beaten down by float's guerrilla attacks, not a float expert by any means. First, float is bees, let's get that out of the way. nextafter does sound like the clearest solution. I used a hand-generated method to convert to the binary representation for my purposes, which made the most sense to me: def _floatToBits(f): s = struct.pack('>f', f) # capital 'L' is important - sign bit is already built into 754 return struct.unpack('>L', s)[0] def _bitsToFloat(b): s = struct.pack('>l', b) return struct.unpack('>f', s)[0] However nextafter accomplishes the same thing. I did find it easiest/clearest to move to the binary representation to do all ulp-boundary math and comparisons, and only convert back to float at the end. There are many gotcha's here in addition to the ones you've already mentioned. The first would be "who's ulp?" I haven't checked the spec governing these CL tests in question, but arb_shader_precision never specified which ulp it was referring to. Is it the value of an ulp at the expected value? Or the value of an ulp at one of the inputs? Or the largest of all those? Also, it's very helpful when the spec specifies a range of values for which the operation can be held to the specified number of ulps. Secondly the rounding issue you raised is valid and difficult to get right. Operations submitted to the CPU will use the rounding mode it is set up for. I believe the default on intel chips is round-to-nearest, ties-to-even, which seems to be the case for most GPUs as well, but I'm not an expert on this subject by any means. Rounding doesn't just happen at the end, either - intermediate values in complex equations will be rounded as well - these rounding errors could multiply, or cancel, depending on the inputs. The other thing to consider is if the equation that produced the expected value used python's round(), and it was run with Python2, then round() would use round-away-from-zero; python3's round uses round-to-nearest, ties-to-even. Regarding the generation of expected values, it's important to be very cautious about equations that stack multiple floating point operations, as this is where errors have the potential to get out of hand quickly. The most difficult issue here, in my mind, is fma. Unless you're testing 64-bit floating point on the GPU, it will be doing its calculations in 32-bit space, however, if the GPU compiler employs fma, this can cause intermediate operations to be done at higher precisions. This means that the GPU may be jumping between low-precision and high-precision operations over the course of a single equation, where python may well be doing all operations at a uniform level of precision. This can result in differences in expected results that are very hard to track down. Sadly, it's nearly impossible to predict how a compiler might employ fma for any give equation. You can use python's decimal, bigfloat or numpy (ugh) to clamp down the precision at which the operation is evaluated, but again, it will be difficult-to-impossible to predict if and where the GPU compiler employed fma. Ultimately, you're attempting to get python to run an equation *in exactly the same way* that the shader code does, in order to produce an identical result. This is a difficult thing. Your ulps tolerance will likely, in some way, be accounting for these differences between the CPU and GPU generated results, in addition to its true purpose which is to identify real issues in the GPU compiler or hardware that introduce unintended floating point errors. You will, ultimately, have to run all the test vectors through your chosen solution and carefully inspect any differences in the results. You may discover that certain test vectors are simply not appropriate. In the end, good floating point tests use very carefully selected inputs. Wisdom or deception? Perhaps a bit of both. On 06/15/2015 09:29 AM, Jan Vesely wrote: > > > On Sun, Jun 14, 2015 at 7:48 PM, Aaron Watry <awatry@gmail.com > <mailto:awatry@gmail.com>> wrote: > > > > On Sun, Jun 14, 2015 at 4:19 PM, Jan Vesely <jan.vesely@rutgers.edu > <mailto:jan.vesely@rutgers.edu>> wrote: > > On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: > > Meh, this still feels broken. Give me a bit longer. > > and it is :). I don't think this can work based on abs(expected > - real), > since ULP depends on the magnitude of the numbers. This > information is > lost after subtraction. > > > Yeah, that's essentially the conclusion that I came to last night as > well. Currently, we're saying that 3 ULP is 3x the smallest > representable floating point number, not 3x the interval between > adjacent float values for the given magnitude. > > I was thinking that we might need to take the expected value, cast > to unsigned, add the ulp value, then convert back to float. From > there, find the difference between expected and the expected+ulp, > and use that as a tolerance. We could alternatively call > nextafterf/nextafter the required number of times from the expected > value in either direction and get a min/max allowed value and then > check that the result value is in that range. > > > I considered casting to ints too. However, it runs into problems in some > corner cases (like neg zero, crossing exponent boundaries, ...), > nextafter offered an easy way out. although it's still not perfect. When > the number sits on exponent boundary, going up or down gives different > steps. one example is 2^24: > 16777214: 16777214.000000 > 16777215: 16777215.000000 > 16777216: 16777216.000000 > 16777217: 16777216.000000 > 16777218: 16777218.000000 > 16777219: 16777220.000000 > > we could take something like unit = nextafterf(expected) - expected; > and then compare against ulp * unit, that way we can even subtract 0.5 > unit for python/piglit rounding. > Although nextafterf(expected) - expected hits representability issues > for very large numbers. > > however the problem whether to take nextaftef(x, 0.0f) or nextafterf(x, > +/- inf) persits. > I'm not sure which one we want. if python always rounded to/away from zero, > we could always take the opposite, but it would cause problems for > non-integer ulp (which we don't support at the moment anyway). > > > Jan > > > Yes, we still run into cases where the tests themselves are expected > an incorrectly rounded value, but at least the ULP checking code > might be functioning correctly then. > > Thoughts? > > --Aaron > > > I had an idea some time back to implement this using nextafterf > in both > directions and checking whether the result falls in that interval. > However, there is still a problem. Some of the expected values are > already rounded (I'm not sure what rounding mode is used by > python by > default), but unless it always rounds in one direction, we'll > still get > slight differences based on whether the actual value was rounded > up or > down. > > I have attached a small test program that shows the deficiencies of > fabsf based approaches. > > regards, > Jan > > > > > --Aaron > > > > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry > <awatry@gmail.com <mailto:awatry@gmail.com>> wrote: > > > > > We need to actually check against the float value from the > union, > > > instead of just doing (diff > ulp), which seems to cast the > diff to > > > an int before checking against ulp. > > > > > > Signed-off-by: Aaron Watry <awatry@gmail.com > <mailto:awatry@gmail.com>> > > > CC: Tom Stellard <thomas.stellard@amd.com > <mailto:thomas.stellard@amd.com>> > > > CC: Jan Vesely <jan.vesely@rutgers.edu > <mailto:jan.vesely@rutgers.edu>> > > > --- > > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > > > 1 file changed, 14 insertions(+), 14 deletions(-) > > > > > > diff --git a/tests/util/piglit-util-cl.c > b/tests/util/piglit-util-cl.c > > > index 47e0c7a..6cdd718 100644 > > > --- a/tests/util/piglit-util-cl.c > > > +++ b/tests/util/piglit-util-cl.c > > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, > float expect, > > > uint32_t ulp) > > > > > > diff = fabsf(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%x) with tolerance > %f (%u ulps), > > > but got %f (0x%x)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, > double expect, > > > uint64_t ulp) > > > > > > diff = fabsl(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%lx) with tolerance > %f (%lu ulps), > > > but got %f (0x%lx)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -162,7 +162,7 @@ > piglit_cl_get_platform_version(cl_platform_id platform) > > > int scanf_count; > > > int major; > > > int minor; > > > - > > > + > > > /* > > > * Returned format: > > > * > > > > OpenCL<space><major_version.minor_version><space><platform-specific > > > information> > > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* > obj, cl_uint > > > param) > > > > > > if(errNo == CL_SUCCESS) { > > > param_ptr = calloc(param_size, sizeof(char)); > > > - > > > + > > > /* retrieve param */ > > > if(fn_ptr == clGetPlatformInfo) { > > > errNo = > clGetPlatformInfo(*(cl_platform_id*)obj, > > > param, > > > @@ -463,7 +463,7 @@ > piglit_cl_get_program_build_info(cl_program program, > > > cl_device_id device, > > > .program = program, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetProgramBuildInfo, > &args, param); > > > } > > > > > > @@ -479,7 +479,7 @@ > piglit_cl_get_kernel_work_group_info(cl_kernel kernel, > > > cl_device_id device, > > > .kernel = kernel, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, > &args, param); > > > } > > > > > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id > platform_id, > > > cl_device_type device_type, > > > > piglit_cl_get_error_name(errNo)); > > > return 0; > > > } > > > - > > > + > > > /* get device list */ > > > if(device_ids != NULL && > num_device_ids > 0) { > > > *device_ids = > malloc(num_device_ids * > > > sizeof(cl_device_id)); > > > @@ -761,7 +761,7 @@ > > > > piglit_cl_build_program_with_source_extended(piglit_cl_context > context, > > > piglit_cl_get_error_name(errNo)); > > > return NULL; > > > } > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -788,7 +788,7 @@ > > > > piglit_cl_build_program_with_source_extended(piglit_cl_context > context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n > -------- \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > @@ -848,11 +848,11 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > context, > > > for(i = 0; i < context->num_devices; i++) { > > > char* device_name = > > > piglit_cl_get_device_info(context->device_ids[i], > > > > > > CL_DEVICE_NAME); > > > - > > > + > > > printf("Error for %s: %s\n", > > > device_name, > > > > piglit_cl_get_error_name(binary_status[i])); > > > - > > > + > > > free(device_name); > > > } > > > > > > @@ -860,7 +860,7 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > context, > > > return NULL; > > > } > > > free(binary_status); > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -884,11 +884,11 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n > -------- \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > - > > > + > > > free(device_name); > > > free(log); > > > } > > > -- > > > 2.1.4 > > > > > > > > > -- > Jan Vesely <jan.vesely@rutgers.edu <mailto:jan.vesely@rutgers.edu>> > > > > > > _______________________________________________ > Piglit mailing list > Piglit@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/piglit >
Hi Jan/Tom, Sorry to resurrect an ancient thread, but I was poking at the piglit CL ULP issue last night, and thought I'd try to get us closer to a solution after dropping the matter for too long. I've modified your test program with what I was thinking of trying, and I wouldn't mind your feedback. I realize that we still have issues with discrepancies between python-generated expected results based on cpu/rounding mode and the , but I'd at least like for us to be able to nail down the C portion before we start redefining all of our expected test results. In the long term, we probably want to hand-select our inputs/outputs instead of trusting python, but I don't necessarily think that we should block fixing our ULP calculations on getting that done. For now, I'm still ignoring the half-ULP possibility, and just generating a minimum/maximum allowed value based on running nextafterf(expected, POS/NEG INFINITY) in a loop for ULP iterations. Does that sound like a tenable solution? --Aaron On Sun, Jun 14, 2015 at 4:19 PM, Jan Vesely <jan.vesely@rutgers.edu> wrote: > On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: > > Meh, this still feels broken. Give me a bit longer. > > and it is :). I don't think this can work based on abs(expected - real), > since ULP depends on the magnitude of the numbers. This information is > lost after subtraction. > > I had an idea some time back to implement this using nextafterf in both > directions and checking whether the result falls in that interval. > However, there is still a problem. Some of the expected values are > already rounded (I'm not sure what rounding mode is used by python by > default), but unless it always rounds in one direction, we'll still get > slight differences based on whether the actual value was rounded up or > down. > > I have attached a small test program that shows the deficiencies of > fabsf based approaches. > > regards, > Jan > > > > > --Aaron > > > > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> wrote: > > > > > We need to actually check against the float value from the union, > > > instead of just doing (diff > ulp), which seems to cast the diff to > > > an int before checking against ulp. > > > > > > Signed-off-by: Aaron Watry <awatry@gmail.com> > > > CC: Tom Stellard <thomas.stellard@amd.com> > > > CC: Jan Vesely <jan.vesely@rutgers.edu> > > > --- > > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > > > 1 file changed, 14 insertions(+), 14 deletions(-) > > > > > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit-util-cl.c > > > index 47e0c7a..6cdd718 100644 > > > --- a/tests/util/piglit-util-cl.c > > > +++ b/tests/util/piglit-util-cl.c > > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float expect, > > > uint32_t ulp) > > > > > > diff = fabsf(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%x) with tolerance %f (%u > ulps), > > > but got %f (0x%x)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double expect, > > > uint64_t ulp) > > > > > > diff = fabsl(value - expect); > > > > > > - if(diff > ulp || isnan(value)) { > > > + if (diff > t.f || isnan(value)) { > > > printf("Expecting %f (0x%lx) with tolerance %f (%lu > ulps), > > > but got %f (0x%lx)\n", > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > return false; > > > @@ -162,7 +162,7 @@ piglit_cl_get_platform_version(cl_platform_id > platform) > > > int scanf_count; > > > int major; > > > int minor; > > > - > > > + > > > /* > > > * Returned format: > > > * > > > OpenCL<space><major_version.minor_version><space><platform-specific > > > information> > > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint > > > param) > > > > > > if(errNo == CL_SUCCESS) { > > > param_ptr = calloc(param_size, sizeof(char)); > > > - > > > + > > > /* retrieve param */ > > > if(fn_ptr == clGetPlatformInfo) { > > > errNo = > clGetPlatformInfo(*(cl_platform_id*)obj, > > > param, > > > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program > program, > > > cl_device_id device, > > > .program = program, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetProgramBuildInfo, &args, param); > > > } > > > > > > @@ -479,7 +479,7 @@ piglit_cl_get_kernel_work_group_info(cl_kernel > kernel, > > > cl_device_id device, > > > .kernel = kernel, > > > .device = device > > > }; > > > - > > > + > > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, &args, > param); > > > } > > > > > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id > platform_id, > > > cl_device_type device_type, > > > > piglit_cl_get_error_name(errNo)); > > > return 0; > > > } > > > - > > > + > > > /* get device list */ > > > if(device_ids != NULL && num_device_ids > 0) { > > > *device_ids = malloc(num_device_ids * > > > sizeof(cl_device_id)); > > > @@ -761,7 +761,7 @@ > > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > > piglit_cl_get_error_name(errNo)); > > > return NULL; > > > } > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -788,7 +788,7 @@ > > > piglit_cl_build_program_with_source_extended(piglit_cl_context context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n -------- > \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > @@ -848,11 +848,11 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > for(i = 0; i < context->num_devices; i++) { > > > char* device_name = > > > piglit_cl_get_device_info(context->device_ids[i], > > > > > > CL_DEVICE_NAME); > > > - > > > + > > > printf("Error for %s: %s\n", > > > device_name, > > > > piglit_cl_get_error_name(binary_status[i])); > > > - > > > + > > > free(device_name); > > > } > > > > > > @@ -860,7 +860,7 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > return NULL; > > > } > > > free(binary_status); > > > - > > > + > > > errNo = clBuildProgram(program, > > > context->num_devices, > > > context->device_ids, > > > @@ -884,11 +884,11 @@ > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context context, > > > char* log = > > > piglit_cl_get_program_build_info(program, > > > > > > context->device_ids[i], > > > > > > CL_PROGRAM_BUILD_LOG); > > > - > > > + > > > printf("Build log for device %s:\n -------- > \n%s\n > > > -------- \n", > > > device_name, > > > log); > > > - > > > + > > > free(device_name); > > > free(log); > > > } > > > -- > > > 2.1.4 > > > > > > > > > -- > Jan Vesely <jan.vesely@rutgers.edu> >
On Tue, Dec 29, 2015 at 1:01 PM, Aaron Watry <awatry@gmail.com> wrote: > Hi Jan/Tom, > > Sorry to resurrect an ancient thread, but I was poking at the piglit CL ULP > issue last night, and thought I'd try to get us closer to a solution after > dropping the matter for too long. > > I've modified your test program with what I was thinking of trying, and I > wouldn't mind your feedback. > > I realize that we still have issues with discrepancies between > python-generated expected results based on cpu/rounding mode and the , but > I'd at least like for us to be able to nail down the C portion before we > start redefining all of our expected test results. In the long term, we > probably want to hand-select our inputs/outputs instead of trusting python, > but I don't necessarily think that we should block fixing our ULP > calculations on getting that done. > > For now, I'm still ignoring the half-ULP possibility, and just generating a > minimum/maximum allowed value based on running nextafterf(expected, POS/NEG > INFINITY) in a loop for ULP iterations. Does that sound like a tenable > solution? I'm not 100% sure what you're trying to do, but Micah Fedke (and to a lesser extent I) spent a ton of time trying to make GL_ARB_shader_precision piglits in an automated manner (the ext specifies allowable error for a bunch of functions). We ended up with a solution which can test some very basic things on its own (single operations), but any complex function basically can only be tested with manually-selected values. Take a look at generated_tests/gen_shader_precision_tests.py . The basic issue that you have to contend with is that if you ever end up with a mul + add anywhere in the intermediate calculation of anything (e.g. dot product), it might get collapsed into a fma(), which will mess things up greatly. For example imagine you have the following code: x = uniform value which == a * b; y = a * b - x One way y == 0, another way y == some small but not insignificant number due to the added precision in the a*b calculation. No amount of ULP cleverness will save you here. If this has nothing to do with what you're trying to achieve, feel free to ignore :) -ilia
On Tue, Dec 29, 2015 at 12:16 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote: > On Tue, Dec 29, 2015 at 1:01 PM, Aaron Watry <awatry@gmail.com> wrote: > > Hi Jan/Tom, > > > > Sorry to resurrect an ancient thread, but I was poking at the piglit CL > ULP > > issue last night, and thought I'd try to get us closer to a solution > after > > dropping the matter for too long. > > > > I've modified your test program with what I was thinking of trying, and I > > wouldn't mind your feedback. > > > > I realize that we still have issues with discrepancies between > > python-generated expected results based on cpu/rounding mode and the , > but > > I'd at least like for us to be able to nail down the C portion before we > > start redefining all of our expected test results. In the long term, we > > probably want to hand-select our inputs/outputs instead of trusting > python, > > but I don't necessarily think that we should block fixing our ULP > > calculations on getting that done. > > > > For now, I'm still ignoring the half-ULP possibility, and just > generating a > > minimum/maximum allowed value based on running nextafterf(expected, > POS/NEG > > INFINITY) in a loop for ULP iterations. Does that sound like a tenable > > solution? > > I'm not 100% sure what you're trying to do, but Micah Fedke (and to a > lesser extent I) spent a ton of time trying to make > GL_ARB_shader_precision piglits in an automated manner (the ext > specifies allowable error for a bunch of functions). We ended up with > a solution which can test some very basic things on its own (single > operations), but any complex function basically can only be tested > with manually-selected values. > > Take a look at generated_tests/gen_shader_precision_tests.py . > > The basic issue that you have to contend with is that if you ever end > up with a mul + add anywhere in the intermediate calculation of > anything (e.g. dot product), it might get collapsed into a fma(), > which will mess things up greatly. For example imagine you have the > following code: > > x = uniform value which == a * b; > y = a * b - x > > One way y == 0, another way y == some small but not insignificant > number due to the added precision in the a*b calculation. No amount of > ULP cleverness will save you here. > > If this has nothing to do with what you're trying to achieve, feel > free to ignore :) > > The issue that we have in CL piglits right now is that the program-tester is treating the ULP value for a given test as an absolute tolerance, not as floating units of least precision. Example (generated_tests/cl/builtin/math/builtin-float-cos-1.0.generated.cl ): [test] name: cos float1 kernel_name: test_1_cos_float global_size: 12 0 0 arg_out: 0 buffer float[12] 1.0 0.0 -1.0 0.0 1.0 0.432574513059 0.753902254343 -0.145500033809 0.943808393901 0.626322983292 -0.925879022855 nan tolerance 4 ulp arg_in: 1 buffer float[12] 0.0 1.57079632679 3.14159265359 4.71238898038 6.28318530718 1.12345 7 8 1048576.0 16777216.0 1.32922799578e+36 nan Let's take the second test: cos(1.57079632679) [essentially cos(pi/2)] should be roughly 0.0. The test is specified with a tolerance of 4 ULP. The current piglit CL code is accepting any value in the range [-4.0, ..., 4.0], because it is treating the ULP as an absolute tolerance, not units of least precision. Regardless of whether 0.0 is the absolutely correct answer for cos(1.57079632679), we can hopefully all agree that -4.0 is NOT a valid answer for cosine of anything The functions that I'm trying to fix here (piglit-util-cl.c: piglit_cl_probe_[floating|double]), take: 1) The actual result from the kernel/shader. 2) The expected result defined in the test. 3) A ULP value to use to calculate the acceptable tolerances. But it does it wrong. It was copy/pasted from the integer checking code which takes a tolerance... --Aaron > -ilia >
On Tue, Dec 29, 2015 at 3:18 PM, Aaron Watry <awatry@gmail.com> wrote: > Regardless of whether 0.0 is the absolutely correct answer for > cos(1.57079632679), we can hopefully all agree that -4.0 is NOT a valid > answer for cosine of anything Right so that's clearly wrong :) I was largely warning you about some of the issues we ran into trying to compute ULPs, and especially combining them with each other. The GL_ARB_shader_precision text is quite precise and similarly difficult to test -- e.g. fma() is allowed to be fused or non-fused, at the implementation's option. But that causes all sorts of results to be way different. Perhaps you don't have these issues in the OpenCL specs. -ilia
On Tue, Dec 29, 2015 at 2:28 PM, Ilia Mirkin <imirkin@alum.mit.edu> wrote: > On Tue, Dec 29, 2015 at 3:18 PM, Aaron Watry <awatry@gmail.com> wrote: > > Regardless of whether 0.0 is the absolutely correct answer for > > cos(1.57079632679), we can hopefully all agree that -4.0 is NOT a valid > > answer for cosine of anything > > Right so that's clearly wrong :) I was largely warning you about some > of the issues we ran into trying to compute ULPs, and especially > combining them with each other. The GL_ARB_shader_precision text is > quite precise and similarly difficult to test -- e.g. fma() is allowed > to be fused or non-fused, at the implementation's option. But that > causes all sorts of results to be way different. > > Perhaps you don't have these issues in the OpenCL specs. > > Answers, hopefully? With regards to FMA(a,b,c): CL 1.2, Section 6.12.2 says: "Returns the correctly rounded floating-point representation of the sum of c with the infinitely precise product of a and b. Rounding of intermediate products shall not occur. Edge case behavior is per the IEEE 754-2008 standard." Rounding mode is also called out in section 6.12.2: "The built-in math functions are not affected by the prevailing rounding mode in the calling environment, and always return the same value as they would if called with the round to nearest even rounding mode." CL 1.2, section 7.4: "In this section we discuss the maximum relative error defined as ulp (units in the last place). Addition, subtraction, multiplication, fused multiply-add and conversion between integer and a single precision floating-point format are IEEE 754 compliant and are therefore correctly rounded. Conversion between floating-point formats and explicit conversions specified in section 6.2.3 must be correctly rounded." <SNIP > "The reference value used to compute the ULP value of an arithmetic operation is the infinitely precise result." So, we will still have to deal with the discrepancy between the infinitely precise result and the rounded "expected" result when calculating our ULP-based tolerances, but as already covered in the previous email, the absolute tolerance was definitely wrong. CL has a set of native_* functions that allows the implementation to choose how to calculate the result. Example: native_cos(x), which has implementation-defined precision, but allows the vendor to optimize as they see fit while possibly sacrificing accuracy. Most of the time, I've seen the native_* functions used to call the hardware implementation of those instructions, while the non-native ones are implemented in CL C to achieve the required precision. --Aaron > -ilia >
Hi Aaron, On Tue, 2015-12-29 at 12:01 -0600, Aaron Watry wrote: > Hi Jan/Tom, > > Sorry to resurrect an ancient thread, but I was poking at the piglit > CL ULP > issue last night, and thought I'd try to get us closer to a solution > after > dropping the matter for too long. > > I've modified your test program with what I was thinking of trying, > and I > wouldn't mind your feedback. thanks for picking it up. > > I realize that we still have issues with discrepancies between > python-generated expected results based on cpu/rounding mode and the > , but > I'd at least like for us to be able to nail down the C portion before > we > start redefining all of our expected test results. In the long term, > we > probably want to hand-select our inputs/outputs instead of trusting > python, > but I don't necessarily think that we should block fixing our ULP > calculations on getting that done. I think the python issue is separate, although related. We should assume that whatever provides the reference values is reliable. > > For now, I'm still ignoring the half-ULP possibility, and just > generating a > minimum/maximum allowed value based on running nextafterf(expected, > POS/NEG > INFINITY) in a loop for ULP iterations. Does that sound like a > tenable > solution? I originally considered the same approach, and it has probably the best effort/correctness ratio. I think we should take something similar without waiting for a perfect solution. for more details read below. Here are couple of problems I see: a) the approach assumes that the nextafter distances are always the same, which is not always the case. b) the specs say "The reference value used to compute the ULP value of an arithmetic operation is the infinitely precise result." which in my understanding means that we compare GPU (FP representable) result against infinitely precise mathematical value. the provided value (python reference - PRef) is representable and rounded. thus the ULP reference is anywhere in <PRef - Er_b, PRef + Er_a>, where Er_{a,b} represents rounding interval values for above/below, and ULP_{a,b} the corresponding ULP values. The nextafter approach really checks against N * ULP_a + Er_b, or N * ULP_b + Er_a. Even if ULP_a == ULP_b (in all steps) the value PRef - N * ULP is correct if the infinitely precise result is in <PRef - Er_b, PRef> and incorrect of it is in <PRef, PRef + Er_a>, and we have no way to distinguish these cases. Thus, if you don't use strict inequalities you already test (N + 0.5) ULP (since Er_{a,b} <= 0.5 ULP, for round to nearest modes). Given that we don't have any information about the infinitely precise result we don't even know what ULP value to use for the test if PRef happens to be a boundary value. example: 2^24 is a boundary value for SP float; the neighbouring representable values are (in order): ..., 2^24 -2, 2^24 -1, 2^24, 2^24 +2, 2^24 +4, ... Thus if python provides us with PRef == 2^24 using RTNE(*) rounding, the correct value (Ref) can be anything in <2^24 - 0.5, 2^25>. If the required accuracy is 2 ULP and Ref == 2^24 - 0.3 (ULP = 1.0), then the correct test is: GPU_result >= 2^24 - 2 && GPU_result < 2^24 + 2. ( |GPU_Result - Ref| <= 2.0) if Ref == 2^24 + 0.5 (ULP = 2.0) then the correct test is: GPU_result > 2^24 - 4 && GPU_result <= 2^24 + 4. ( |GPU_Result - Ref| <= 4.0) Note the strictness of inequalities in both cases. changing precision to 2.5 ULP would just change the inequalities to non-strict regards, Jan (*) even is a bit misnomer since all SP floats >= 2.24 are even, I assume it means that mantissa is even > > --Aaron > > On Sun, Jun 14, 2015 at 4:19 PM, Jan Vesely <jan.vesely@rutgers.edu> > wrote: > > > On Sat, 2015-06-13 at 21:22 -0500, Aaron Watry wrote: > > > Meh, this still feels broken. Give me a bit longer. > > > > and it is :). I don't think this can work based on abs(expected - > > real), > > since ULP depends on the magnitude of the numbers. This information > > is > > lost after subtraction. > > > > I had an idea some time back to implement this using nextafterf in > > both > > directions and checking whether the result falls in that interval. > > However, there is still a problem. Some of the expected values are > > already rounded (I'm not sure what rounding mode is used by python > > by > > default), but unless it always rounds in one direction, we'll still > > get > > slight differences based on whether the actual value was rounded up > > or > > down. > > > > I have attached a small test program that shows the deficiencies of > > fabsf based approaches. > > > > regards, > > Jan > > > > > > > > --Aaron > > > > > > On Sat, Jun 13, 2015 at 2:28 PM, Aaron Watry <awatry@gmail.com> > > > wrote: > > > > > > > We need to actually check against the float value from the > > > > union, > > > > instead of just doing (diff > ulp), which seems to cast the > > > > diff to > > > > an int before checking against ulp. > > > > > > > > Signed-off-by: Aaron Watry <awatry@gmail.com> > > > > CC: Tom Stellard <thomas.stellard@amd.com> > > > > CC: Jan Vesely <jan.vesely@rutgers.edu> > > > > --- > > > > tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- > > > > 1 file changed, 14 insertions(+), 14 deletions(-) > > > > > > > > diff --git a/tests/util/piglit-util-cl.c b/tests/util/piglit- > > > > util-cl.c > > > > index 47e0c7a..6cdd718 100644 > > > > --- a/tests/util/piglit-util-cl.c > > > > +++ b/tests/util/piglit-util-cl.c > > > > @@ -80,7 +80,7 @@ piglit_cl_probe_floating(float value, float > > > > expect, > > > > uint32_t ulp) > > > > > > > > diff = fabsf(value - expect); > > > > > > > > - if(diff > ulp || isnan(value)) { > > > > + if (diff > t.f || isnan(value)) { > > > > printf("Expecting %f (0x%x) with tolerance %f > > > > (%u > > ulps), > > > > but got %f (0x%x)\n", > > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > > return false; > > > > @@ -108,7 +108,7 @@ piglit_cl_probe_double(double value, double > > > > expect, > > > > uint64_t ulp) > > > > > > > > diff = fabsl(value - expect); > > > > > > > > - if(diff > ulp || isnan(value)) { > > > > + if (diff > t.f || isnan(value)) { > > > > printf("Expecting %f (0x%lx) with tolerance %f > > > > (%lu > > ulps), > > > > but got %f (0x%lx)\n", > > > > e.f, e.u, t.f, t.u, v.f, v.u); > > > > return false; > > > > @@ -162,7 +162,7 @@ > > > > piglit_cl_get_platform_version(cl_platform_id > > platform) > > > > int scanf_count; > > > > int major; > > > > int minor; > > > > - > > > > + > > > > /* > > > > * Returned format: > > > > * > > > > OpenCL<space><major_version.minor_version><space><platform- > > > > specific > > > > information> > > > > @@ -353,7 +353,7 @@ piglit_cl_get_info(void* fn_ptr, void* obj, > > > > cl_uint > > > > param) > > > > > > > > if(errNo == CL_SUCCESS) { > > > > param_ptr = calloc(param_size, sizeof(char)); > > > > - > > > > + > > > > /* retrieve param */ > > > > if(fn_ptr == clGetPlatformInfo) { > > > > errNo = > > clGetPlatformInfo(*(cl_platform_id*)obj, > > > > param, > > > > @@ -463,7 +463,7 @@ piglit_cl_get_program_build_info(cl_program > > program, > > > > cl_device_id device, > > > > .program = program, > > > > .device = device > > > > }; > > > > - > > > > + > > > > return piglit_cl_get_info(clGetProgramBuildInfo, &args, > > > > param); > > > > } > > > > > > > > @@ -479,7 +479,7 @@ > > > > piglit_cl_get_kernel_work_group_info(cl_kernel > > kernel, > > > > cl_device_id device, > > > > .kernel = kernel, > > > > .device = device > > > > }; > > > > - > > > > + > > > > return piglit_cl_get_info(clGetKernelWorkGroupInfo, > > > > &args, > > param); > > > > } > > > > > > > > @@ -620,7 +620,7 @@ piglit_cl_get_device_ids(cl_platform_id > > platform_id, > > > > cl_device_type device_type, > > > > > > piglit_cl_get_error_name(errNo)); > > > > return 0; > > > > } > > > > - > > > > + > > > > /* get device list */ > > > > if(device_ids != NULL && num_device_ids > > > > > 0) { > > > > *device_ids = > > > > malloc(num_device_ids * > > > > sizeof(cl_device_id)); > > > > @@ -761,7 +761,7 @@ > > > > piglit_cl_build_program_with_source_extended(piglit_cl_context > > > > context, > > > > piglit_cl_get_error_name(errNo)); > > > > return NULL; > > > > } > > > > - > > > > + > > > > errNo = clBuildProgram(program, > > > > context->num_devices, > > > > context->device_ids, > > > > @@ -788,7 +788,7 @@ > > > > piglit_cl_build_program_with_source_extended(piglit_cl_context > > > > context, > > > > char* log = > > > > piglit_cl_get_program_build_info(program, > > > > > > > > context->device_ids[i], > > > > > > > > CL_PROGRAM_BUILD_LOG); > > > > - > > > > + > > > > printf("Build log for device %s:\n ---- > > > > ---- > > \n%s\n > > > > -------- \n", > > > > device_name, > > > > log); > > > > @@ -848,11 +848,11 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > > > > context, > > > > for(i = 0; i < context->num_devices; i++) { > > > > char* device_name = > > > > piglit_cl_get_device_info(context->device_ids[i], > > > > > > > > CL_DEVICE_NAME); > > > > - > > > > + > > > > printf("Error for %s: %s\n", > > > > device_name, > > > > > > piglit_cl_get_error_name(binary_status[i])); > > > > - > > > > + > > > > free(device_name); > > > > } > > > > > > > > @@ -860,7 +860,7 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > > > > context, > > > > return NULL; > > > > } > > > > free(binary_status); > > > > - > > > > + > > > > errNo = clBuildProgram(program, > > > > context->num_devices, > > > > context->device_ids, > > > > @@ -884,11 +884,11 @@ > > > > piglit_cl_build_program_with_binary_extended(piglit_cl_context > > > > context, > > > > char* log = > > > > piglit_cl_get_program_build_info(program, > > > > > > > > context->device_ids[i], > > > > > > > > CL_PROGRAM_BUILD_LOG); > > > > - > > > > + > > > > printf("Build log for device %s:\n ---- > > > > ---- > > \n%s\n > > > > -------- \n", > > > > device_name, > > > > log); > > > > - > > > > + > > > > free(device_name); > > > > free(log); > > > > } > > > > -- > > > > 2.1.4 > > > > > > > > > > > > > > -- > > Jan Vesely <jan.vesely@rutgers.edu> > >
We need to actually check against the float value from the union, instead of just doing (diff > ulp), which seems to cast the diff to an int before checking against ulp. Signed-off-by: Aaron Watry <awatry@gmail.com> CC: Tom Stellard <thomas.stellard@amd.com> CC: Jan Vesely <jan.vesely@rutgers.edu> --- tests/util/piglit-util-cl.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)