[1/2] Add the geometric builtin functions for half.

Submitted by junyan.he@inbox.com on July 6, 2015, 10:26 a.m.

Details

Message ID 1436178374-7068-1-git-send-email-junyan.he@inbox.com
State New
Headers show

Not browsing as part of any series.

Commit Message

junyan.he@inbox.com July 6, 2015, 10:26 a.m.
From: Junyan He <junyan.he@linux.intel.com>

Signed-off-by: Junyan He <junyan.he@linux.intel.com>
---
 backend/src/libocl/include/ocl_geometric.h | 34 +++++++++++++
 backend/src/libocl/src/ocl_geometric.cl    | 81 ++++++++++++++++++++++++++++++
 backend/src/llvm/llvm_gen_backend.cpp      |  7 ++-
 3 files changed, 121 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/libocl/include/ocl_geometric.h b/backend/src/libocl/include/ocl_geometric.h
index 86d543b..b99e44b 100644
--- a/backend/src/libocl/include/ocl_geometric.h
+++ b/backend/src/libocl/include/ocl_geometric.h
@@ -53,4 +53,38 @@  OVERLOADABLE float4 fast_normalize(float4 x);
 OVERLOADABLE float3 cross(float3 v0, float3 v1);
 OVERLOADABLE float4 cross(float4 v0, float4 v1);
 
+
+OVERLOADABLE half dot(half p0, half p1);
+OVERLOADABLE half dot(half2 p0, half2 p1);
+OVERLOADABLE half dot(half3 p0, half3 p1);
+OVERLOADABLE half dot(half4 p0, half4 p1);
+OVERLOADABLE half length(half x);
+OVERLOADABLE half length(half2 x);
+OVERLOADABLE half length(half3 x);
+OVERLOADABLE half length(half4 x);
+OVERLOADABLE half distance(half x, half y);
+OVERLOADABLE half distance(half2 x, half2 y);
+OVERLOADABLE half distance(half3 x, half3 y);
+OVERLOADABLE half distance(half4 x, half4 y);
+OVERLOADABLE half normalize(half x);
+OVERLOADABLE half2 normalize(half2 x);
+OVERLOADABLE half3 normalize(half3 x);
+OVERLOADABLE half4 normalize(half4 x);
+
+OVERLOADABLE half fast_length(half x);
+OVERLOADABLE half fast_length(half2 x);
+OVERLOADABLE half fast_length(half3 x);
+OVERLOADABLE half fast_length(half4 x);
+OVERLOADABLE half fast_distance(half x, half y);
+OVERLOADABLE half fast_distance(half2 x, half2 y);
+OVERLOADABLE half fast_distance(half3 x, half3 y);
+OVERLOADABLE half fast_distance(half4 x, half4 y);
+OVERLOADABLE half fast_normalize(half x);
+OVERLOADABLE half2 fast_normalize(half2 x);
+OVERLOADABLE half3 fast_normalize(half3 x);
+OVERLOADABLE half4 fast_normalize(half4 x);
+
+OVERLOADABLE half3 cross(half3 v0, half3 v1);
+OVERLOADABLE half4 cross(half4 v0, half4 v1);
+
 #endif
diff --git a/backend/src/libocl/src/ocl_geometric.cl b/backend/src/libocl/src/ocl_geometric.cl
index 886e88c..f864463 100644
--- a/backend/src/libocl/src/ocl_geometric.cl
+++ b/backend/src/libocl/src/ocl_geometric.cl
@@ -99,3 +99,84 @@  OVERLOADABLE float3 cross(float3 v0, float3 v1) {
 OVERLOADABLE float4 cross(float4 v0, float4 v1) {
    return (float4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, 0.f);
 }
+
+
+// Half
+CONST half __gen_ocl_habs(half x) __asm("llvm.fabs" ".f16");
+
+OVERLOADABLE half dot(half p0, half p1) {
+  return p0 * p1;
+}
+OVERLOADABLE half dot(half2 p0, half2 p1) {
+  return p0.x * p1.x + p0.y * p1.y;
+}
+OVERLOADABLE half dot(half3 p0, half3 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+OVERLOADABLE half dot(half4 p0, half4 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+OVERLOADABLE half length(half x) { return __gen_ocl_habs(x); }
+
+#define BODY \
+  m = m== (half)0.0 ? (half)1.0 : m; \
+  m = isinf(m) ? (half)1.0 : m; \
+  x = x/m; \
+  return m * sqrt(dot(x,x));
+OVERLOADABLE half length(half2 x) {
+  half m = max(__gen_ocl_habs(x.s0), __gen_ocl_habs(x.s1));
+  BODY;
+}
+OVERLOADABLE half length(half3 x) {
+  half m = max(__gen_ocl_habs(x.s0), max(__gen_ocl_habs(x.s1), __gen_ocl_habs(x.s2)));
+  BODY;
+}
+OVERLOADABLE half length(half4 x) {
+  half m = max(__gen_ocl_habs(x.s0), max(__gen_ocl_habs(x.s1), max(__gen_ocl_habs(x.s2), __gen_ocl_habs(x.s3))));
+  BODY;
+}
+#undef BODY
+OVERLOADABLE half distance(half x, half y) { return length(x-y); }
+OVERLOADABLE half distance(half2 x, half2 y) { return length(x-y); }
+OVERLOADABLE half distance(half3 x, half3 y) { return length(x-y); }
+OVERLOADABLE half distance(half4 x, half4 y) { return length(x-y); }
+OVERLOADABLE half normalize(half x) {
+  half m = length(x);
+  m = m == (half)0.0 ? (half)1.0 : m;
+  return x / m;
+}
+OVERLOADABLE half2 normalize(half2 x) {
+  half m = length(x);
+  m = m == (half)0.0 ? (half)1.0 : m;
+  return x / m;
+}
+OVERLOADABLE half3 normalize(half3 x) {
+  half m = length(x);
+  m = m == (half)0.0 ? (half)1.0 : m;
+  return x / m;
+}
+OVERLOADABLE half4 normalize(half4 x) {
+  half m = length(x);
+  m = m == (half)0.0 ? (half)1.0 : m;
+  return x / m;
+}
+
+OVERLOADABLE half fast_length(half x) { return __gen_ocl_habs(x); }
+OVERLOADABLE half fast_length(half2 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE half fast_length(half3 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE half fast_length(half4 x) { return sqrt(dot(x,x)); }
+OVERLOADABLE half fast_distance(half x, half y) { return length(x-y); }
+OVERLOADABLE half fast_distance(half2 x, half2 y) { return length(x-y); }
+OVERLOADABLE half fast_distance(half3 x, half3 y) { return length(x-y); }
+OVERLOADABLE half fast_distance(half4 x, half4 y) { return length(x-y); }
+OVERLOADABLE half fast_normalize(half x) { return x > 0 ? (half)1.0 : (x < 0 ? (half)(-1.0) : (half)0.0); }
+OVERLOADABLE half2 fast_normalize(half2 x) { return x * rsqrt(dot(x, x)); }
+OVERLOADABLE half3 fast_normalize(half3 x) { return x * rsqrt(dot(x, x)); }
+OVERLOADABLE half4 fast_normalize(half4 x) { return x * rsqrt(dot(x, x)); }
+
+OVERLOADABLE half3 cross(half3 v0, half3 v1) {
+   return v0.yzx*v1.zxy-v0.zxy*v1.yzx;
+}
+OVERLOADABLE half4 cross(half4 v0, half4 v1) {
+   return (half4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, (half)0.0);
+}
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 37c9e7b..6443e38 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3653,7 +3653,12 @@  namespace gbe
           break;
           case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
           case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
-          case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+          case Intrinsic::fabs:
+          {
+            ir::Type srcType = getType(ctx, I.getType());
+            this->emitUnaryCallInst(I,CS,ir::OP_ABS, srcType);
+            break;
+          }
           case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
           case Intrinsic::rint: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break;
           case Intrinsic::floor: this->emitUnaryCallInst(I,CS,ir::OP_RNDD); break;