[06/19] Backend: Add CalcTimestamp and StoreProfiling.

Submitted by junyan.he@inbox.com on Sept. 9, 2015, midnight

Details

Message ID 1441756870-32320-7-git-send-email-junyan.he@inbox.com
State New
Headers show

Not browsing as part of any series.

Commit Message

junyan.he@inbox.com Sept. 9, 2015, midnight
From: Junyan He <junyan.he@linux.intel.com>

When in profiling, the profiling inserter function will
insert calc_timestamp for each point which we are interested
in. At the end of the kernel, just before return, we will
insert a store_profiling function call. The function will
hold a reference to the global val profiling_buf and avoid
it being released when run optimization passes.

Signed-off-by: Junyan He <junyan.he@linux.intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp      |   43 ++++++++++++++++++++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |    5 ++++
 2 files changed, 48 insertions(+)

Patch hide | download patch | download mbox

diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 4905415..5c2b590 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1112,6 +1112,9 @@  namespace gbe
     } else if (origin->getName().equals(StringRef("__gen_ocl_printf_index_buf"))) {
       new_bti = btiBase;
       incBtiBase();
+    } else if (origin->getName().equals(StringRef("__gen_ocl_timestamp_buf"))) {
+      new_bti = btiBase;
+      incBtiBase();
     }
     else if (isa<GlobalVariable>(origin)
         && dyn_cast<GlobalVariable>(origin)->isConstant()) {
@@ -2485,6 +2488,9 @@  namespace gbe
         } else if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) {
           ctx.getFunction().getPrintfSet()->setIndexBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
           regTranslator.newScalarProxy(ir::ocl::printfiptr, const_cast<GlobalVariable*>(&v));
+        } else if(v.getName().equals(StringRef("__gen_ocl_profiling_buf"))) {
+          ctx.getUnit().getProfilingInfo()->setBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
+          regTranslator.newScalarProxy(ir::ocl::profilingbptr, const_cast<GlobalVariable*>(&v));
         } else if(v.getName().str().substr(0, 4) == ".str") {
           /* When there are multi printf statements in multi kernel fucntions within the same
              translate unit, if they have the same sting parameter, such as
@@ -3494,6 +3500,8 @@  namespace gbe
         this->newRegister(&I);
         break;
       case GEN_OCL_PRINTF:
+      case GEN_OCL_CALC_TIMESTAMP:
+      case GEN_OCL_STORE_PROFILING:
         break;
       case GEN_OCL_NOT_FOUND:
       default:
@@ -4179,6 +4187,41 @@  namespace gbe
             assert(fmt);
             break;
           }
+          case GEN_OCL_CALC_TIMESTAMP:
+          {
+            GBE_ASSERT(AI != AE);
+            ConstantInt *CI = dyn_cast<ConstantInt>(*AI);
+            GBE_ASSERT(CI);
+            uint32_t pointNum = CI->getZExtValue();
+            AI++;
+            GBE_ASSERT(AI != AE);
+            CI = dyn_cast<ConstantInt>(*AI);
+            GBE_ASSERT(CI);
+            uint32_t tsType = CI->getZExtValue();
+            ctx.CALC_TIMESTAMP(pointNum, tsType);
+            break;
+          }
+          case GEN_OCL_STORE_PROFILING:
+          {
+            /* The profiling log always begin at 0 offset, so we
+               never need the buffer ptr value and ptrBase, and
+               no need for SUB to calculate the real address, neither.
+               We just pass down the BTI value to the instruction. */
+            GBE_ASSERT(AI != AE);
+            Value* llvmPtr = *AI;
+            Value *bti = getBtiRegister(llvmPtr);
+            GBE_ASSERT(isa<ConstantInt>(bti)); //Should never be mixed pointer.
+            uint32_t index = cast<ConstantInt>(bti)->getZExtValue();
+            GBE_ASSERT(btiToGen(index) == ir::MEM_GLOBAL);
+            ++AI;
+            GBE_ASSERT(AI != AE);
+            ConstantInt *CI = dyn_cast<ConstantInt>(*AI);
+            GBE_ASSERT(CI);
+            uint32_t ptype = CI->getZExtValue();
+            ctx.getUnit().getProfilingInfo()->setProfilingType(ptype);
+            //ctx.STORE_PROFILING(index, ptype);
+            break;
+          }
           case GEN_OCL_SIMD_SIZE:
           {
             const ir::Register dst = this->getRegister(&I);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index cabb225..0a6e1da 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -170,3 +170,8 @@  DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
 
 // printf function
 DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
+
+// store timestamp function
+DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, __gen_ocl_calc_timestamp)
+// store profiling info to the mem.
+DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)