backend: refine math log function

Submitted by rander on June 19, 2017, 5:20 a.m.

Details

Message ID 1497849647-1500-1-git-send-email-rander.wang@intel.com
State New
Series "backend: refine math log function"
Headers show

Commit Message

rander June 19, 2017, 5:20 a.m.
remove a few unnecessary codes , and get 20% improvement
	at worse case. If X is a NAN, there are some if-return
	codes to return NAN. Now change it to add(x - x) which
	get the same NAN

	pass the conformance tests and utests

Signed-off-by: rander.wang <rander.wang@intel.com>
---
 backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 50 +++++--------------------
 1 file changed, 10 insertions(+), 40 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index b4764ee..2c0a702 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -201,38 +201,19 @@  OVERLOADABLE float inline __gen_ocl_internal_log_valid(float x) {
   k += (i>>23);
   f = x - 1.0f;
   fsq = f * f;
-
-  if((0x007fffff & (15 + ix)) < 16) { /* |f| < 2**-20 */
-      R = fsq * (0.5f - 0.33333333333333333f * f);
-      return k * ln2_hi + k * ln2_lo + f - R;
-  }
-
-  s = f / (2.0f + f);
+  s = mad(-2.0f, 1.0f / (2.0f + f), 1.0f);
   z = s * s;
-  i = ix - (0x6147a << 3);
   w = z * z;
-  j = (0x6b851 << 3) - ix;
-  t1= w * mad(w, Lg4, Lg2);
-  t2= z * mad(w, Lg3, Lg1);
-  i |= j;
-  R = t2 + t1;
-  partial = (i > 0) ? -mad(s, 0.5f * fsq, -0.5f * fsq) : (s * f);
-
-  return mad(s, R, f) - partial + k * ln2_hi + k * ln2_lo;;
+  t1 = w * mad(w, Lg4, Lg2);
+  R = mad(z, mad(w, Lg3, Lg1), t1);
+  w = 0.5f * fsq;
+  partial = -mad(s, w, -w);
+  return mad(k, ln2_lo, mad(k, ln2_hi, mad(s, R, f) - partial));
 }
 
 OVERLOADABLE float __gen_ocl_internal_log(float x)
 {
-  union { unsigned int i; float f; } u;
-  u.f = x;
-  int ix = u.i;
-
-  if (ix < 0 )
-	return NAN;  /* log(-#) = NaN */
-  if (ix >= 0x7f800000)
-    return NAN;
-
-  return __gen_ocl_internal_log_valid(x);
+  return __gen_ocl_internal_log_valid(x) + (x - x);
 }
 
 OVERLOADABLE float __gen_ocl_internal_log10(float x)
@@ -244,12 +225,10 @@  OVERLOADABLE float __gen_ocl_internal_log10(float x)
   log10_2lo  =  7.9034151668e-07; /* 0x355427db */
 
   float y, z;
-  int i, k, hx;
+  int i, k;
+  unsigned int hx;
 
   u.f = x; hx = u.i;
-
-  if (hx<0)
-    return NAN; /* log(-#) = NaN */
   if (hx >= 0x7f800000)
     return NAN;
 
@@ -267,17 +246,8 @@  OVERLOADABLE float __gen_ocl_internal_log2(float x)
 {
   const float zero   =  0.0,
   invln2 = 0x1.715476p+0f;
-  int ix;
-
-  union { float f; int i; } u;
-  u.f = x; ix = u.i;
-
-  if (ix < 0)
-	return NAN;    /** log(-#) = NaN */
-  if (ix >= 0x7f800000)
-	return NAN;
 
-  return invln2 * __gen_ocl_internal_log_valid(x);
+  return invln2 * __gen_ocl_internal_log_valid(x) + (x - x);
 }
 
 

Comments

Yang, Rong R July 4, 2017, 12:34 a.m.
LGTM, pushed, thanks.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> rander.wang

> Sent: Monday, June 19, 2017 13:21

> To: beignet@freedesktop.org

> Cc: Wang, Rander <rander.wang@intel.com>

> Subject: [Beignet] [PATCH] backend: refine math log function

> 

> 	remove a few unnecessary codes , and get 20% improvement

> 	at worse case. If X is a NAN, there are some if-return

> 	codes to return NAN. Now change it to add(x - x) which

> 	get the same NAN

> 

> 	pass the conformance tests and utests

> 

> Signed-off-by: rander.wang <rander.wang@intel.com>

> ---

>  backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 50 +++++----------------

> ----

>  1 file changed, 10 insertions(+), 40 deletions(-)

> 

> diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl

> b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl

> index b4764ee..2c0a702 100644

> --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl

> +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl

> @@ -201,38 +201,19 @@ OVERLOADABLE float inline

> __gen_ocl_internal_log_valid(float x) {

>    k += (i>>23);

>    f = x - 1.0f;

>    fsq = f * f;

> -

> -  if((0x007fffff & (15 + ix)) < 16) { /* |f| < 2**-20 */

> -      R = fsq * (0.5f - 0.33333333333333333f * f);

> -      return k * ln2_hi + k * ln2_lo + f - R;

> -  }

> -

> -  s = f / (2.0f + f);

> +  s = mad(-2.0f, 1.0f / (2.0f + f), 1.0f);

>    z = s * s;

> -  i = ix - (0x6147a << 3);

>    w = z * z;

> -  j = (0x6b851 << 3) - ix;

> -  t1= w * mad(w, Lg4, Lg2);

> -  t2= z * mad(w, Lg3, Lg1);

> -  i |= j;

> -  R = t2 + t1;

> -  partial = (i > 0) ? -mad(s, 0.5f * fsq, -0.5f * fsq) : (s * f);

> -

> -  return mad(s, R, f) - partial + k * ln2_hi + k * ln2_lo;;

> +  t1 = w * mad(w, Lg4, Lg2);

> +  R = mad(z, mad(w, Lg3, Lg1), t1);

> +  w = 0.5f * fsq;

> +  partial = -mad(s, w, -w);

> +  return mad(k, ln2_lo, mad(k, ln2_hi, mad(s, R, f) - partial));

>  }

> 

>  OVERLOADABLE float __gen_ocl_internal_log(float x)  {

> -  union { unsigned int i; float f; } u;

> -  u.f = x;

> -  int ix = u.i;

> -

> -  if (ix < 0 )

> -	return NAN;  /* log(-#) = NaN */

> -  if (ix >= 0x7f800000)

> -    return NAN;

> -

> -  return __gen_ocl_internal_log_valid(x);

> +  return __gen_ocl_internal_log_valid(x) + (x - x);

>  }

> 

>  OVERLOADABLE float __gen_ocl_internal_log10(float x) @@ -244,12 +225,10

> @@ OVERLOADABLE float __gen_ocl_internal_log10(float x)

>    log10_2lo  =  7.9034151668e-07; /* 0x355427db */

> 

>    float y, z;

> -  int i, k, hx;

> +  int i, k;

> +  unsigned int hx;

> 

>    u.f = x; hx = u.i;

> -

> -  if (hx<0)

> -    return NAN; /* log(-#) = NaN */

>    if (hx >= 0x7f800000)

>      return NAN;

> 

> @@ -267,17 +246,8 @@ OVERLOADABLE float __gen_ocl_internal_log2(float

> x)  {

>    const float zero   =  0.0,

>    invln2 = 0x1.715476p+0f;

> -  int ix;

> -

> -  union { float f; int i; } u;

> -  u.f = x; ix = u.i;

> -

> -  if (ix < 0)

> -	return NAN;    /** log(-#) = NaN */

> -  if (ix >= 0x7f800000)

> -	return NAN;

> 

> -  return invln2 * __gen_ocl_internal_log_valid(x);

> +  return invln2 * __gen_ocl_internal_log_valid(x) + (x - x);

>  }

> 

> 

> --

> 2.7.4

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet