3 op math functions dst need 16 byte align when allocate register.

Submitted by Luo, Xionghu on May 19, 2016, 11:44 a.m.

Details

Message ID 1463658245-12910-1-git-send-email-xionghu.luo@intel.com
State New
Headers show
Series "3 op math functions dst need 16 byte align when allocate register." ( rev: 1 ) in Beignet

Not browsing as part of any series.

Commit Message

Luo, Xionghu May 19, 2016, 11:44 a.m.
From: Luo Xionghu <xionghu.luo@intel.com>

gpu hang will happen for uniform mad instruction without dest
register 16-byte alignment, check and adjust to 16 byte align
for mad dst before register allocate.
this patch could fix
"STRICT=0, opencv_test_video/OCL_Video/PyrLKOpticalFlow."

Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
---
 backend/src/backend/gen_reg_allocation.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index da3dac0..1aaac58 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -49,10 +49,11 @@  namespace gbe
    */
   struct GenRegInterval {
     INLINE GenRegInterval(ir::Register reg) :
-      reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0) {}
+      reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0), b3OpAlign(0) {}
     ir::Register reg;     //!< (virtual) register of the interval
     int32_t minID, maxID; //!< Starting and ending points
     ir::Register conflictReg; // < has banck conflict with this register
+    bool b3OpAlign;
   };
 
   typedef struct GenRegIntervalKey {
@@ -1050,6 +1051,9 @@  namespace gbe
         }
       }
     }
+    if (interval.b3OpAlign != 0) {
+      alignment = (alignment + 15) & ~15;
+    }
     while ((grfOffset = ctx.allocate(size, alignment, direction)) == -1) {
       const bool success = this->expireGRF(interval);
       if (success == false) {
@@ -1138,6 +1142,9 @@  namespace gbe
               reg == ir::ocl::groupid1 ||
               reg == ir::ocl::groupid2)
             continue;
+          if (is3SrcOp) {
+              this->intervals[reg].b3OpAlign = 1;
+          }
           this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
           this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
         }