[V2] backend: refine load/store merging algorithm

Submitted by rander on June 16, 2017, 1:50 a.m.

Details

Message ID 1497577827-14512-1-git-send-email-rander.wang@intel.com
State New
Series "backend: refine load/store merging algorithm"
Headers show

Commit Message

rander June 16, 2017, 1:50 a.m.
Now it works for sequence: load(0), load(1), load(2)
	but it cant work for load(2), load(0), load(1). because
        it compared the last merged load and the new one not all
	the loads

	for  sequence: load(0), load(1), load(2). the load(0) is the
        start, can find that load(1) is successor without space, so
        put it to a merge fifo. then the start is moving to the top
        of fifo load(1), and compared with load(2). Also load(2) can be merged

	for load(2), load(0), load(1). load(2) cant be merged with
        load(0) for a space between them. So skip load(0) and mov to next
        load(1).And this load(1) can be merged. But it never go back merge
        load(0)

        Now change the algorithm.
	(1) find all loads maybe merged arround the start by the distance to
	    the start. the distance is depended on data type, for 32bit data, the
            distance is 4. Put them in a list

        (2) sort the list by the distance from the start.

	(3) search the continuous sequence including the start to merge

	V2: (1)refine the sort and compare algoritm. First find all the IO
	       in small offset compared to start. Then call std:sort
            (2)check the number of candidate IO to be favorable to performance
	       for most cases there is no chance to merge IO

Signed-off-by: rander.wang <rander.wang@intel.com>
---
 backend/src/llvm/llvm_loadstore_optimization.cpp | 87 +++++++++++++++++++++---
 1 file changed, 78 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
index 5aa38be..c91c1a0 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -67,7 +67,7 @@  namespace gbe {
     bool     isSimpleLoadStore(Value *I);
     bool     optimizeLoadStore(BasicBlock &BB);
 
-    bool     isLoadStoreCompatible(Value *A, Value *B);
+    bool     isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize);
     void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
     void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
     bool     findConsecutiveAccess(BasicBlock &BB,
@@ -109,7 +109,7 @@  namespace gbe {
     return NULL;
   }
 
-  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B) {
+  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize) {
     Value *ptrA = getPointerOperand(A);
     Value *ptrB = getPointerOperand(B);
     unsigned ASA = getAddressSpace(A);
@@ -136,7 +136,11 @@  namespace gbe {
     // The Instructions are connsecutive if the size of the first load/store is
     // the same as the offset.
     int64_t sz = TD->getTypeStoreSize(Ty);
-    return ((-offset) == sz);
+    *dist = -offset;
+    *elementSize = sz;
+
+    //a insn with small distance from the search load/store is a candidate one
+    return (abs(-offset) < sz*maxVecSize);
   }
 
   void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged) {
@@ -163,6 +167,25 @@  namespace gbe {
       values[i]->replaceAllUsesWith(S);
     }
   }
+
+  class mergedInfo{
+    public:
+    Instruction* mInsn;
+    int mOffset;
+
+    void init(Instruction* insn, int offset)
+    {
+      mInsn = insn;
+      mOffset = offset;
+    }
+  };
+
+  struct offsetSorter {
+    bool operator()(mergedInfo* m0, mergedInfo* m1) const {
+    return m0->mOffset < m1->mOffset;
+    }
+  };
+
   // When searching for consecutive memory access, we do it in a small window,
   // if the window is too large, it would take up too much compiling time.
   // An Important rule we have followed is don't try to change load/store order.
@@ -177,7 +200,6 @@  namespace gbe {
 
     if(!isSimpleLoadStore(&*start)) return false;
 
-    merged.push_back(&*start);
     unsigned targetAddrSpace = getAddressSpace(&*start);
 
     BasicBlock::iterator E = BB.end();
@@ -187,11 +209,27 @@  namespace gbe {
     unsigned maxLimit = maxVecSize * 8;
     bool reordered = false;
 
-    for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {
-      if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
-        if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {
-          merged.push_back(&*J);
-        }
+    bool ready = false;
+    int elementSize;
+
+    SmallVector<mergedInfo *, 16> searchInsnArray;
+    mergedInfo meInfoArray[16];
+    int indx = 0;
+    meInfoArray[indx++].init(&*start, 0);
+
+    searchInsnArray.push_back(&meInfoArray[0]);
+    BasicBlock::iterator I = start;
+    ++I;
+
+    for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
+      if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
+          int distance;
+          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance, &elementSize, maxVecSize))
+          {
+            meInfoArray[indx].init(&*I, distance);
+            searchInsnArray.push_back(&meInfoArray[indx]);
+            indx++;
+          }
       } else if((isLoad && isa<StoreInst>(*J))) {
         // simple stop to keep read/write order
         StoreInst *st = cast<StoreInst>(&*J);
@@ -214,6 +252,37 @@  namespace gbe {
       if(merged.size() >= maxVecSize) break;
     }
 
+    if(indx > 1)
+    {
+      //try to sort the load/store by the offset from the start
+      //the farthest is at the beginning. this is easy to find the
+      //continuous load/store
+      std::sort(searchInsnArray.begin(), searchInsnArray.end(), offsetSorter());
+
+      // try to find continuous loadstore insn in the candidate array
+      for(unsigned i = 0; i < searchInsnArray.size(); i++)
+      {
+        unsigned j;
+        for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)
+        {
+          if(searchInsnArray[i+j+1]->mOffset - searchInsnArray[i+j]->mOffset != elementSize)
+            break;
+
+          //this means the search load/store which offset is 0, is in the sequence
+          if(searchInsnArray[i+j]->mOffset == 0 || searchInsnArray[i+j+1]->mOffset == 0)
+            ready = true;
+        }
+
+        if(j > 0 && ready)
+        {
+          for(unsigned k = 0; k < j+1; k++)
+            merged.push_back(searchInsnArray[i+k]->mInsn);
+
+          break;
+        }
+      }
+    }
+
     return reordered;
   }
 

Comments

Song, Ruiling June 22, 2017, 6:29 a.m.
LGTM

Ruiling

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> rander.wang

> Sent: Friday, June 16, 2017 9:50 AM

> To: beignet@freedesktop.org

> Cc: Wang, Rander <rander.wang@intel.com>

> Subject: [Beignet] [PATCH V2] backend: refine load/store merging algorithm

> 

> 	Now it works for sequence: load(0), load(1), load(2)

> 	but it cant work for load(2), load(0), load(1). because

>         it compared the last merged load and the new one not all

> 	the loads

> 

> 	for  sequence: load(0), load(1), load(2). the load(0) is the

>         start, can find that load(1) is successor without space, so

>         put it to a merge fifo. then the start is moving to the top

>         of fifo load(1), and compared with load(2). Also load(2) can be merged

> 

> 	for load(2), load(0), load(1). load(2) cant be merged with

>         load(0) for a space between them. So skip load(0) and mov to next

>         load(1).And this load(1) can be merged. But it never go back merge

>         load(0)

> 

>         Now change the algorithm.

> 	(1) find all loads maybe merged arround the start by the distance to

> 	    the start. the distance is depended on data type, for 32bit data, the

>             distance is 4. Put them in a list

> 

>         (2) sort the list by the distance from the start.

> 

> 	(3) search the continuous sequence including the start to merge

> 

> 	V2: (1)refine the sort and compare algoritm. First find all the IO

> 	       in small offset compared to start. Then call std:sort

>             (2)check the number of candidate IO to be favorable to performance

> 	       for most cases there is no chance to merge IO

> 

> Signed-off-by: rander.wang <rander.wang@intel.com>

> ---

>  backend/src/llvm/llvm_loadstore_optimization.cpp | 87

> +++++++++++++++++++++---

>  1 file changed, 78 insertions(+), 9 deletions(-)

> 

> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp

> b/backend/src/llvm/llvm_loadstore_optimization.cpp

> index 5aa38be..c91c1a0 100644

> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp

> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp

> @@ -67,7 +67,7 @@ namespace gbe {

>      bool     isSimpleLoadStore(Value *I);

>      bool     optimizeLoadStore(BasicBlock &BB);

> 

> -    bool     isLoadStoreCompatible(Value *A, Value *B);

> +    bool     isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize,

> int maxVecSize);

>      void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);

>      void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);

>      bool     findConsecutiveAccess(BasicBlock &BB,

> @@ -109,7 +109,7 @@ namespace gbe {

>      return NULL;

>    }

> 

> -  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B) {

> +  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B,

> int *dist, int* elementSize, int maxVecSize) {

>      Value *ptrA = getPointerOperand(A);

>      Value *ptrB = getPointerOperand(B);

>      unsigned ASA = getAddressSpace(A);

> @@ -136,7 +136,11 @@ namespace gbe {

>      // The Instructions are connsecutive if the size of the first load/store is

>      // the same as the offset.

>      int64_t sz = TD->getTypeStoreSize(Ty);

> -    return ((-offset) == sz);

> +    *dist = -offset;

> +    *elementSize = sz;

> +

> +    //a insn with small distance from the search load/store is a candidate one

> +    return (abs(-offset) < sz*maxVecSize);

>    }

> 

>    void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,

> SmallVector<Instruction*, 16> &merged) {

> @@ -163,6 +167,25 @@ namespace gbe {

>        values[i]->replaceAllUsesWith(S);

>      }

>    }

> +

> +  class mergedInfo{

> +    public:

> +    Instruction* mInsn;

> +    int mOffset;

> +

> +    void init(Instruction* insn, int offset)

> +    {

> +      mInsn = insn;

> +      mOffset = offset;

> +    }

> +  };

> +

> +  struct offsetSorter {

> +    bool operator()(mergedInfo* m0, mergedInfo* m1) const {

> +    return m0->mOffset < m1->mOffset;

> +    }

> +  };

> +

>    // When searching for consecutive memory access, we do it in a small window,

>    // if the window is too large, it would take up too much compiling time.

>    // An Important rule we have followed is don't try to change load/store order.

> @@ -177,7 +200,6 @@ namespace gbe {

> 

>      if(!isSimpleLoadStore(&*start)) return false;

> 

> -    merged.push_back(&*start);

>      unsigned targetAddrSpace = getAddressSpace(&*start);

> 

>      BasicBlock::iterator E = BB.end();

> @@ -187,11 +209,27 @@ namespace gbe {

>      unsigned maxLimit = maxVecSize * 8;

>      bool reordered = false;

> 

> -    for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {

> -      if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {

> -        if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {

> -          merged.push_back(&*J);

> -        }

> +    bool ready = false;

> +    int elementSize;

> +

> +    SmallVector<mergedInfo *, 16> searchInsnArray;

> +    mergedInfo meInfoArray[16];

> +    int indx = 0;

> +    meInfoArray[indx++].init(&*start, 0);

> +

> +    searchInsnArray.push_back(&meInfoArray[0]);

> +    BasicBlock::iterator I = start;

> +    ++I;

> +

> +    for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {

> +      if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {

> +          int distance;

> +          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance,

> &elementSize, maxVecSize))

> +          {

> +            meInfoArray[indx].init(&*I, distance);

> +            searchInsnArray.push_back(&meInfoArray[indx]);

> +            indx++;

> +          }

>        } else if((isLoad && isa<StoreInst>(*J))) {

>          // simple stop to keep read/write order

>          StoreInst *st = cast<StoreInst>(&*J);

> @@ -214,6 +252,37 @@ namespace gbe {

>        if(merged.size() >= maxVecSize) break;

>      }

> 

> +    if(indx > 1)

> +    {

> +      //try to sort the load/store by the offset from the start

> +      //the farthest is at the beginning. this is easy to find the

> +      //continuous load/store

> +      std::sort(searchInsnArray.begin(), searchInsnArray.end(), offsetSorter());

> +

> +      // try to find continuous loadstore insn in the candidate array

> +      for(unsigned i = 0; i < searchInsnArray.size(); i++)

> +      {

> +        unsigned j;

> +        for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)

> +        {

> +          if(searchInsnArray[i+j+1]->mOffset - searchInsnArray[i+j]->mOffset !=

> elementSize)

> +            break;

> +

> +          //this means the search load/store which offset is 0, is in the sequence

> +          if(searchInsnArray[i+j]->mOffset == 0 || searchInsnArray[i+j+1]->mOffset

> == 0)

> +            ready = true;

> +        }

> +

> +        if(j > 0 && ready)

> +        {

> +          for(unsigned k = 0; k < j+1; k++)

> +            merged.push_back(searchInsnArray[i+k]->mInsn);

> +

> +          break;

> +        }

> +      }

> +    }

> +

>      return reordered;

>    }

> 

> --

> 2.7.4

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet
Yang, Rong R June 23, 2017, 5:28 a.m.
Pushed, thanks.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> Song, Ruiling

> Sent: Thursday, June 22, 2017 14:29

> To: Wang, Rander <rander.wang@intel.com>; beignet@freedesktop.org

> Cc: Wang, Rander <rander.wang@intel.com>

> Subject: Re: [Beignet] [PATCH V2] backend: refine load/store merging

> algorithm

> 

> LGTM

> 

> Ruiling

> 

> > -----Original Message-----

> > From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf

> > Of rander.wang

> > Sent: Friday, June 16, 2017 9:50 AM

> > To: beignet@freedesktop.org

> > Cc: Wang, Rander <rander.wang@intel.com>

> > Subject: [Beignet] [PATCH V2] backend: refine load/store merging

> > algorithm

> >

> > 	Now it works for sequence: load(0), load(1), load(2)

> > 	but it cant work for load(2), load(0), load(1). because

> >         it compared the last merged load and the new one not all

> > 	the loads

> >

> > 	for  sequence: load(0), load(1), load(2). the load(0) is the

> >         start, can find that load(1) is successor without space, so

> >         put it to a merge fifo. then the start is moving to the top

> >         of fifo load(1), and compared with load(2). Also load(2) can

> > be merged

> >

> > 	for load(2), load(0), load(1). load(2) cant be merged with

> >         load(0) for a space between them. So skip load(0) and mov to next

> >         load(1).And this load(1) can be merged. But it never go back merge

> >         load(0)

> >

> >         Now change the algorithm.

> > 	(1) find all loads maybe merged arround the start by the distance to

> > 	    the start. the distance is depended on data type, for 32bit data, the

> >             distance is 4. Put them in a list

> >

> >         (2) sort the list by the distance from the start.

> >

> > 	(3) search the continuous sequence including the start to merge

> >

> > 	V2: (1)refine the sort and compare algoritm. First find all the IO

> > 	       in small offset compared to start. Then call std:sort

> >             (2)check the number of candidate IO to be favorable to performance

> > 	       for most cases there is no chance to merge IO

> >

> > Signed-off-by: rander.wang <rander.wang@intel.com>

> > ---

> >  backend/src/llvm/llvm_loadstore_optimization.cpp | 87

> > +++++++++++++++++++++---

> >  1 file changed, 78 insertions(+), 9 deletions(-)

> >

> > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp

> > b/backend/src/llvm/llvm_loadstore_optimization.cpp

> > index 5aa38be..c91c1a0 100644

> > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp

> > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp

> > @@ -67,7 +67,7 @@ namespace gbe {

> >      bool     isSimpleLoadStore(Value *I);

> >      bool     optimizeLoadStore(BasicBlock &BB);

> >

> > -    bool     isLoadStoreCompatible(Value *A, Value *B);

> > +    bool     isLoadStoreCompatible(Value *A, Value *B, int *dist, int*

> elementSize,

> > int maxVecSize);

> >      void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16>

> &merged);

> >      void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16>

> &merged);

> >      bool     findConsecutiveAccess(BasicBlock &BB,

> > @@ -109,7 +109,7 @@ namespace gbe {

> >      return NULL;

> >    }

> >

> > -  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,

> > Value *B) {

> > +  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,

> > + Value *B,

> > int *dist, int* elementSize, int maxVecSize) {

> >      Value *ptrA = getPointerOperand(A);

> >      Value *ptrB = getPointerOperand(B);

> >      unsigned ASA = getAddressSpace(A); @@ -136,7 +136,11 @@

> namespace

> > gbe {

> >      // The Instructions are connsecutive if the size of the first load/store is

> >      // the same as the offset.

> >      int64_t sz = TD->getTypeStoreSize(Ty);

> > -    return ((-offset) == sz);

> > +    *dist = -offset;

> > +    *elementSize = sz;

> > +

> > +    //a insn with small distance from the search load/store is a candidate

> one

> > +    return (abs(-offset) < sz*maxVecSize);

> >    }

> >

> >    void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,

> > SmallVector<Instruction*, 16> &merged) { @@ -163,6 +167,25 @@

> > namespace gbe {

> >        values[i]->replaceAllUsesWith(S);

> >      }

> >    }

> > +

> > +  class mergedInfo{

> > +    public:

> > +    Instruction* mInsn;

> > +    int mOffset;

> > +

> > +    void init(Instruction* insn, int offset)

> > +    {

> > +      mInsn = insn;

> > +      mOffset = offset;

> > +    }

> > +  };

> > +

> > +  struct offsetSorter {

> > +    bool operator()(mergedInfo* m0, mergedInfo* m1) const {

> > +    return m0->mOffset < m1->mOffset;

> > +    }

> > +  };

> > +

> >    // When searching for consecutive memory access, we do it in a small

> window,

> >    // if the window is too large, it would take up too much compiling time.

> >    // An Important rule we have followed is don't try to change load/store

> order.

> > @@ -177,7 +200,6 @@ namespace gbe {

> >

> >      if(!isSimpleLoadStore(&*start)) return false;

> >

> > -    merged.push_back(&*start);

> >      unsigned targetAddrSpace = getAddressSpace(&*start);

> >

> >      BasicBlock::iterator E = BB.end(); @@ -187,11 +209,27 @@

> > namespace gbe {

> >      unsigned maxLimit = maxVecSize * 8;

> >      bool reordered = false;

> >

> > -    for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {

> > -      if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {

> > -        if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {

> > -          merged.push_back(&*J);

> > -        }

> > +    bool ready = false;

> > +    int elementSize;

> > +

> > +    SmallVector<mergedInfo *, 16> searchInsnArray;

> > +    mergedInfo meInfoArray[16];

> > +    int indx = 0;

> > +    meInfoArray[indx++].init(&*start, 0);

> > +

> > +    searchInsnArray.push_back(&meInfoArray[0]);

> > +    BasicBlock::iterator I = start;

> > +    ++I;

> > +

> > +    for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {

> > +      if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {

> > +          int distance;

> > +          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I,

> > + &distance,

> > &elementSize, maxVecSize))

> > +          {

> > +            meInfoArray[indx].init(&*I, distance);

> > +            searchInsnArray.push_back(&meInfoArray[indx]);

> > +            indx++;

> > +          }

> >        } else if((isLoad && isa<StoreInst>(*J))) {

> >          // simple stop to keep read/write order

> >          StoreInst *st = cast<StoreInst>(&*J); @@ -214,6 +252,37 @@

> > namespace gbe {

> >        if(merged.size() >= maxVecSize) break;

> >      }

> >

> > +    if(indx > 1)

> > +    {

> > +      //try to sort the load/store by the offset from the start

> > +      //the farthest is at the beginning. this is easy to find the

> > +      //continuous load/store

> > +      std::sort(searchInsnArray.begin(), searchInsnArray.end(),

> > + offsetSorter());

> > +

> > +      // try to find continuous loadstore insn in the candidate array

> > +      for(unsigned i = 0; i < searchInsnArray.size(); i++)

> > +      {

> > +        unsigned j;

> > +        for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)

> > +        {

> > +          if(searchInsnArray[i+j+1]->mOffset -

> > + searchInsnArray[i+j]->mOffset !=

> > elementSize)

> > +            break;

> > +

> > +          //this means the search load/store which offset is 0, is in the

> sequence

> > +          if(searchInsnArray[i+j]->mOffset == 0 ||

> > + searchInsnArray[i+j+1]->mOffset

> > == 0)

> > +            ready = true;

> > +        }

> > +

> > +        if(j > 0 && ready)

> > +        {

> > +          for(unsigned k = 0; k < j+1; k++)

> > +            merged.push_back(searchInsnArray[i+k]->mInsn);

> > +

> > +          break;

> > +        }

> > +      }

> > +    }

> > +

> >      return reordered;

> >    }

> >

> > --

> > 2.7.4

> >

> > _______________________________________________

> > Beignet mailing list

> > Beignet@lists.freedesktop.org

> > https://lists.freedesktop.org/mailman/listinfo/beignet

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet