[V3] backend: refine load store optimization

Submitted by rander on July 7, 2017, 2:53 a.m.

Details

Message ID 1499396002-3219-1-git-send-email-rander.wang@intel.com
State New
Headers show
Series "backend: refine load store optimization" ( rev: 3 ) in Beignet

Browsing this patch as part of:
"backend: refine load store optimization" rev 3 in Beignet
<< prev patch [1/1] next patch >>

Commit Message

rander July 7, 2017, 2:53 a.m.
this fix basic test in conformance tests failed for vec8 of char because
         of overflow. And it fix many test items failed in opencv because of
	 offset error

	(1)modify the size of searchInsnArray to 32, it is the max size for char
	   And add check for overflow if too many insn
	(2)Make sure the start insn is the first insn of searched array
           because if it is not the first, the offset maybe invalid. And
	   it is complex to modify offset without error

	V2:refine search index, using J not I
	V3:remove (2), now add offset to the pointer of start
	   pass OpenCV, conformance basic and compiler tests, utests

Signed-off-by: rander.wang <rander.wang@intel.com>
---
 backend/src/llvm/llvm_loadstore_optimization.cpp | 103 ++++++++++++++++-------
 1 file changed, 74 insertions(+), 29 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
index c91c1a0..081f16e 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -68,13 +68,14 @@  namespace gbe {
     bool     optimizeLoadStore(BasicBlock &BB);
 
     bool     isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize);
-    void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
-    void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
+    void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged, Instruction *start,int offset);
+    void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged, Instruction *start,int offset);
     bool     findConsecutiveAccess(BasicBlock &BB,
                                   SmallVector<Instruction*, 16> &merged,
                                   const BasicBlock::iterator &start,
                                   unsigned maxVecSize,
-                                  bool isLoad);
+                                  bool isLoad,
+                                  int *addrOffset);
 #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
     virtual StringRef getPassName() const
 #else
@@ -143,7 +144,10 @@  namespace gbe {
     return (abs(-offset) < sz*maxVecSize);
   }
 
-  void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged) {
+  void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
+                                            SmallVector<Instruction*, 16> &merged,
+                                            Instruction *start,
+                                            int offset) {
     IRBuilder<> Builder(&BB);
 
     unsigned size = merged.size();
@@ -151,14 +155,24 @@  namespace gbe {
     for(unsigned i = 0; i < size; i++) {
       values.push_back(merged[i]);
     }
-    LoadInst *ld = cast<LoadInst>(merged[0]);
+    LoadInst *ld = cast<LoadInst>(start);
     unsigned align = ld->getAlignment();
     unsigned addrSpace = ld->getPointerAddressSpace();
     // insert before first load
     Builder.SetInsertPoint(ld);
+
+    //modify offset
+    Value *newPtr = ld->getPointerOperand();
+    if(offset != 0)
+    {
+      Value *StartAddr = Builder.CreatePtrToInt(ld->getPointerOperand(), Builder.getInt32Ty());
+      Value *offsetVal = ConstantInt::get(Builder.getInt32Ty(), offset);
+      Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal);
+      newPtr = Builder.CreateIntToPtr(newAddr, ld->getPointerOperand()->getType());
+    }
+
     VectorType *vecTy = VectorType::get(ld->getType(), size);
-    Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
-                                        PointerType::get(vecTy, addrSpace));
+    Value *vecPtr = Builder.CreateBitCast(newPtr, PointerType::get(vecTy, addrSpace));
     LoadInst *vecValue = Builder.CreateLoad(vecPtr);
     vecValue->setAlignment(align);
 
@@ -196,8 +210,8 @@  namespace gbe {
                             SmallVector<Instruction*, 16> &merged,
                             const BasicBlock::iterator &start,
                             unsigned maxVecSize,
-                            bool isLoad) {
-
+                            bool isLoad,
+                            int *addrOffset) {
     if(!isSimpleLoadStore(&*start)) return false;
 
     unsigned targetAddrSpace = getAddressSpace(&*start);
@@ -212,23 +226,26 @@  namespace gbe {
     bool ready = false;
     int elementSize;
 
-    SmallVector<mergedInfo *, 16> searchInsnArray;
-    mergedInfo meInfoArray[16];
+    SmallVector<mergedInfo *, 32> searchInsnArray;
+    mergedInfo meInfoArray[32];
     int indx = 0;
     meInfoArray[indx++].init(&*start, 0);
-
     searchInsnArray.push_back(&meInfoArray[0]);
-    BasicBlock::iterator I = start;
-    ++I;
 
-    for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
-      if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
+    bool realReorder = false;
+    for(unsigned ss = 0; J!= E && ss <= maxLimit; ++ss, ++J) {
+      if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
           int distance;
-          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance, &elementSize, maxVecSize))
+          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*J, &distance, &elementSize, maxVecSize))
           {
-            meInfoArray[indx].init(&*I, distance);
+            meInfoArray[indx].init(&*J, distance);
             searchInsnArray.push_back(&meInfoArray[indx]);
             indx++;
+            if(indx >= 32)
+              break;
+
+            if(reordered)
+              realReorder = true;
           }
       } else if((isLoad && isa<StoreInst>(*J))) {
         // simple stop to keep read/write order
@@ -248,10 +265,11 @@  namespace gbe {
           break;
         }
       }
-
-      if(merged.size() >= maxVecSize) break;
     }
 
+    if(!realReorder)
+      reordered = false;
+
     if(indx > 1)
     {
       //try to sort the load/store by the offset from the start
@@ -275,8 +293,16 @@  namespace gbe {
 
         if(j > 0 && ready)
         {
-          for(unsigned k = 0; k < j+1; k++)
+          unsigned endIndx = j + 1;
+          *addrOffset = searchInsnArray[i]->mOffset;
+          endIndx = (endIndx >= 16) ? 16 : (endIndx >= 8 ? 8 : (endIndx >= 4 ? 4 : endIndx));
+
+          for(unsigned k = 0; k < endIndx; k++)
+          {
             merged.push_back(searchInsnArray[i+k]->mInsn);
+            if (k >= maxVecSize)
+              break;
+          }
 
           break;
         }
@@ -286,7 +312,10 @@  namespace gbe {
     return reordered;
   }
 
-  void GenLoadStoreOptimization::mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged) {
+  void GenLoadStoreOptimization::mergeStore(BasicBlock &BB,
+                                            SmallVector<Instruction*, 16> &merged,
+                                            Instruction *start,
+                                            int offset) {
     IRBuilder<> Builder(&BB);
 
     unsigned size = merged.size();
@@ -294,7 +323,7 @@  namespace gbe {
     for(unsigned i = 0; i < size; i++) {
       values.push_back(cast<StoreInst>(merged[i])->getValueOperand());
     }
-    StoreInst *st = cast<StoreInst>(merged[0]);
+    StoreInst *st = cast<StoreInst>(start);
     if(!st)
       return;
 
@@ -314,7 +343,18 @@  namespace gbe {
     Value * stPointer = st->getPointerOperand();
     if(!stPointer)
       return;
-    Value *newPtr = Builder.CreateBitCast(stPointer, PointerType::get(vecTy, addrSpace));
+
+    //modify offset
+    Value *newSPtr = stPointer;
+    if(offset != 0)
+    {
+      Value *StartAddr = Builder.CreatePtrToInt(stPointer, Builder.getInt32Ty());
+      Value *offsetVal = ConstantInt::get(Builder.getInt32Ty(), offset);
+      Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal);
+      newSPtr = Builder.CreateIntToPtr(newAddr, stPointer->getType());
+    }
+
+    Value *newPtr = Builder.CreateBitCast(newSPtr, PointerType::get(vecTy, addrSpace));
     StoreInst *newST = Builder.CreateStore(parent, newPtr);
     newST->setAlignment(align);
   }
@@ -330,10 +370,13 @@  namespace gbe {
     unsigned size = toBeDeleted.size();
     if (reorder) {
       unsigned i = 0;
-      while (i < size && toBeDeleted[i] == &*safe) {
-        ++i;
+      while (i < size) {
+      if(toBeDeleted[i] == &*safe) {
         ++safe;
       }
+
+        ++i;
+      }
     } else {
       safe = BasicBlock::iterator(toBeDeleted[size - 1]);
       ++safe;
@@ -355,12 +398,14 @@  namespace gbe {
              ((ty->isIntegerTy(8) || ty->isIntegerTy(16)) && isLoad)))
           continue;
 
+        int addrOffset = 0;
         unsigned maxVecSize = (ty->isFloatTy() || ty->isIntegerTy(32)) ? 4 :
                               (ty->isIntegerTy(16) ? 8 : 16);
-        bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize, isLoad);
+        bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize, isLoad, &addrOffset);
         uint32_t size = merged.size();
         uint32_t pos = 0;
         bool doDeleting = size > 1;
+        BasicBlock::iterator startLS = BBI;
         if (doDeleting) {
           // choose next undeleted instruction
           BBI = findSafeInstruction(merged, BBI, reorder);
@@ -372,9 +417,9 @@  namespace gbe {
                              (size >= 4 ? 4 : size));
           SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos, merged.begin() + pos + vecSize);
           if(isLoad)
-            mergeLoad(BB, mergedVec);
+            mergeLoad(BB, mergedVec, &*startLS, addrOffset);
           else
-            mergeStore(BB, mergedVec);
+            mergeStore(BB, mergedVec, &*startLS, addrOffset);
           // remove merged insn
           for(uint32_t i = 0; i < mergedVec.size(); i++)
             mergedVec[i]->eraseFromParent();