generate sub_group_id inside kernel instead of payload

Submitted by Guo Yejun on Aug. 7, 2015, 12:51 a.m.

Details

Message ID 1438908687-25707-1-git-send-email-yejun.guo@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Guo Yejun Aug. 7, 2015, 12:51 a.m.
get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
previously we set up the values in kernel payload, now change it
to generate the values inside kernel with packed integer vector.

Signed-off-by: Guo Yejun <yejun.guo@intel.com>
---
 backend/src/backend/gen_context.cpp        |  8 --------
 backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--
 backend/src/backend/program.h              |  1 -
 backend/src/ir/profile.cpp                 |  1 -
 backend/src/ir/profile.hpp                 |  7 +++----
 src/cl_command_queue_gen7.c                |  8 --------
 6 files changed, 19 insertions(+), 24 deletions(-)

Patch hide | download patch | download mbox

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e16b0a9..29b58df 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2217,13 +2217,8 @@  namespace gbe
       allocCurbeReg(reg, GBE_CURBE_##PATCH); \
     } else
 
-    bool needLaneID = false;
     fn.foreachInstruction([&](ir::Instruction &insn) {
       const uint32_t srcNum = insn.getSrcNum();
-      if (insn.getOpcode() == ir::OP_SIMD_ID) {
-        GBE_ASSERT(srcNum == 0);
-        needLaneID = true;
-      }
       for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
         const ir::Register reg = insn.getSrc(srcID);
         if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2262,9 +2257,6 @@  namespace gbe
     });
 #undef INSERT_REG
 
-    if (needLaneID)
-      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
-
     // After this point the vector is immutable. Sorting it will make
     // research faster
     std::sort(kernel->patches.begin(), kernel->patches.end());
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b0ba9e3..9772ad1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2299,8 +2299,22 @@  namespace gbe
           break;
         case ir::OP_SIMD_ID:
           {
-            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
-            sel.MOV(dst, selLaneID);
+            const GenRegister landID = GenRegister::immv(0x76543210);
+            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);
+            const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);
+
+            uint32_t execWidth = sel.curr.execWidth;
+            sel.curr.execWidth = 8;
+            sel.MOV(dst_, landID);
+
+            if (execWidth == 16) {
+                //Packed Unsigned Half-Byte Integer Vector does not work
+                //have to mock by adding 8 to the singed vector
+                const GenRegister eight = GenRegister::immuw(8);
+                sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
+                sel.curr.execWidth = 16;
+            }
+            sel.MOV(dst, dst_);
           }
           break;
         default: NOT_SUPPORTED;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 3637ebb..56db1a1 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,7 +101,6 @@  enum gbe_curbe_type {
   GBE_CURBE_THREAD_NUM,
   GBE_CURBE_ZERO,
   GBE_CURBE_ONE,
-  GBE_CURBE_LANE_ID,
   GBE_CURBE_SLM_OFFSET,
   GBE_CURBE_BTI_UTIL,
 };
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index af9f698..37f2d3d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -90,7 +90,6 @@  namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
-      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
       DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
       DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
     }
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 9323824..bf909be 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,10 +72,9 @@  namespace ir {
     static const Register printfbptr = Register(28); // printf buffer address .
     static const Register printfiptr = Register(29); // printf index buffer address.
     static const Register dwblockip = Register(30);  // blockip
-    static const Register laneid = Register(31);  // lane id.
-    static const Register invalid = Register(32);  // used for valid comparation.
-    static const Register btiUtil = Register(33);  // used for mixed pointer as bti utility.
-    static const uint32_t regNum = 34;             // number of special registers
+    static const Register invalid = Register(31);  // used for valid comparation.
+    static const Register btiUtil = Register(32);  // used for mixed pointer as bti utility.
+    static const uint32_t regNum = 33;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 89f39b3..4adbd2b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,14 +210,6 @@  cl_curbe_fill(cl_kernel ker,
   UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
 #undef UPLOAD
 
-  /* get_sub_group_id needs it */
-  if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
-    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
-    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
-    int32_t i;
-    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
-  }
-
   /* Write identity for the stack pointer. This is required by the stack pointer
    * computation in the kernel
    */

Comments

One comment.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> Guo Yejun

> Sent: Friday, August 7, 2015 08:51

> To: beignet@lists.freedesktop.org

> Cc: Guo, Yejun

> Subject: [Beignet] [PATCH] generate sub_group_id inside kernel instead of

> payload

> 

> get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,

> previously we set up the values in kernel payload, now change it to generate

> the values inside kernel with packed integer vector.

> 

> Signed-off-by: Guo Yejun <yejun.guo@intel.com>

> ---

>  backend/src/backend/gen_context.cpp        |  8 --------

>  backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--

>  backend/src/backend/program.h              |  1 -

>  backend/src/ir/profile.cpp                 |  1 -

>  backend/src/ir/profile.hpp                 |  7 +++----

>  src/cl_command_queue_gen7.c                |  8 --------

>  6 files changed, 19 insertions(+), 24 deletions(-)

> 

> diff --git a/backend/src/backend/gen_context.cpp

> b/backend/src/backend/gen_context.cpp

> index e16b0a9..29b58df 100644

> --- a/backend/src/backend/gen_context.cpp

> +++ b/backend/src/backend/gen_context.cpp

> @@ -2217,13 +2217,8 @@ namespace gbe

>        allocCurbeReg(reg, GBE_CURBE_##PATCH); \

>      } else

> 

> -    bool needLaneID = false;

>      fn.foreachInstruction([&](ir::Instruction &insn) {

>        const uint32_t srcNum = insn.getSrcNum();

> -      if (insn.getOpcode() == ir::OP_SIMD_ID) {

> -        GBE_ASSERT(srcNum == 0);

> -        needLaneID = true;

> -      }

>        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {

>          const ir::Register reg = insn.getSrc(srcID);

>          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 +2257,6

> @@ namespace gbe

>      });

>  #undef INSERT_REG

> 

> -    if (needLaneID)

> -      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);

> -

>      // After this point the vector is immutable. Sorting it will make

>      // research faster

>      std::sort(kernel->patches.begin(), kernel->patches.end()); diff --git

> a/backend/src/backend/gen_insn_selection.cpp

> b/backend/src/backend/gen_insn_selection.cpp

> index b0ba9e3..9772ad1 100644

> --- a/backend/src/backend/gen_insn_selection.cpp

> +++ b/backend/src/backend/gen_insn_selection.cpp

> @@ -2299,8 +2299,22 @@ namespace gbe

>            break;

>          case ir::OP_SIMD_ID:

>            {

> -            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);

> -            sel.MOV(dst, selLaneID);

> +            const GenRegister landID = GenRegister::immv(0x76543210);

> +            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);

> +            const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);

> +

> +            uint32_t execWidth = sel.curr.execWidth;

> +            sel.curr.execWidth = 8;

> +            sel.MOV(dst_, landID);

> +

> +            if (execWidth == 16) {

> +                //Packed Unsigned Half-Byte Integer Vector does not work

> +                //have to mock by adding 8 to the singed vector

> +                const GenRegister eight = GenRegister::immuw(8);

> +                sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);

> +                sel.curr.execWidth = 16;

> +            }


Because you have changed the state, I think it is better to use sel.push(), sel.pop().

> +            sel.MOV(dst, dst_);

>            }

>            break;

>          default: NOT_SUPPORTED;

> diff --git a/backend/src/backend/program.h

> b/backend/src/backend/program.h index 3637ebb..56db1a1 100644

> --- a/backend/src/backend/program.h

> +++ b/backend/src/backend/program.h

> @@ -101,7 +101,6 @@ enum gbe_curbe_type {

>    GBE_CURBE_THREAD_NUM,

>    GBE_CURBE_ZERO,

>    GBE_CURBE_ONE,

> -  GBE_CURBE_LANE_ID,

>    GBE_CURBE_SLM_OFFSET,

>    GBE_CURBE_BTI_UTIL,

>  };

> diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index

> af9f698..37f2d3d 100644

> --- a/backend/src/ir/profile.cpp

> +++ b/backend/src/ir/profile.cpp

> @@ -90,7 +90,6 @@ namespace ir {

>        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);

>        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);

>        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);

> -      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);

>        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);

>        DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);

>      }

> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index

> 9323824..bf909be 100644

> --- a/backend/src/ir/profile.hpp

> +++ b/backend/src/ir/profile.hpp

> @@ -72,10 +72,9 @@ namespace ir {

>      static const Register printfbptr = Register(28); // printf buffer address .

>      static const Register printfiptr = Register(29); // printf index buffer address.

>      static const Register dwblockip = Register(30);  // blockip

> -    static const Register laneid = Register(31);  // lane id.

> -    static const Register invalid = Register(32);  // used for valid comparation.

> -    static const Register btiUtil = Register(33);  // used for mixed pointer as bti

> utility.

> -    static const uint32_t regNum = 34;             // number of special registers

> +    static const Register invalid = Register(31);  // used for valid comparation.

> +    static const Register btiUtil = Register(32);  // used for mixed pointer as bti

> utility.

> +    static const uint32_t regNum = 33;             // number of special registers

>      extern const char *specialRegMean[];           // special register name.

>    } /* namespace ocl */

> 

> diff --git a/src/cl_command_queue_gen7.c

> b/src/cl_command_queue_gen7.c index 89f39b3..4adbd2b 100644

> --- a/src/cl_command_queue_gen7.c

> +++ b/src/cl_command_queue_gen7.c

> @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,

>    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD

> 

> -  /* get_sub_group_id needs it */

> -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,

> GBE_CURBE_LANE_ID, 0)) >= 0) {

> -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);

> -    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);

> -    int32_t i;

> -    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;

> -  }

> -

>    /* Write identity for the stack pointer. This is required by the stack pointer

>     * computation in the kernel

>     */

> --

> 1.9.1

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet
The sel.push()/pop() is already at the beginning/end of the function, shall I add it again?

Btw, I plan to encapsulate into a function so that others can get the lane id easily.

-----Original Message-----
From: Yang, Rong R 

Sent: Monday, August 10, 2015 11:37 AM
To: Guo, Yejun; beignet@lists.freedesktop.org
Cc: Guo, Yejun
Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel instead of payload

One comment.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf 

> Of Guo Yejun

> Sent: Friday, August 7, 2015 08:51

> To: beignet@lists.freedesktop.org

> Cc: Guo, Yejun

> Subject: [Beignet] [PATCH] generate sub_group_id inside kernel instead 

> of payload

> 

> get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16, 

> previously we set up the values in kernel payload, now change it to 

> generate the values inside kernel with packed integer vector.

> 

> Signed-off-by: Guo Yejun <yejun.guo@intel.com>

> ---

>  backend/src/backend/gen_context.cpp        |  8 --------

>  backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--

>  backend/src/backend/program.h              |  1 -

>  backend/src/ir/profile.cpp                 |  1 -

>  backend/src/ir/profile.hpp                 |  7 +++----

>  src/cl_command_queue_gen7.c                |  8 --------

>  6 files changed, 19 insertions(+), 24 deletions(-)

> 

> diff --git a/backend/src/backend/gen_context.cpp

> b/backend/src/backend/gen_context.cpp

> index e16b0a9..29b58df 100644

> --- a/backend/src/backend/gen_context.cpp

> +++ b/backend/src/backend/gen_context.cpp

> @@ -2217,13 +2217,8 @@ namespace gbe

>        allocCurbeReg(reg, GBE_CURBE_##PATCH); \

>      } else

> 

> -    bool needLaneID = false;

>      fn.foreachInstruction([&](ir::Instruction &insn) {

>        const uint32_t srcNum = insn.getSrcNum();

> -      if (insn.getOpcode() == ir::OP_SIMD_ID) {

> -        GBE_ASSERT(srcNum == 0);

> -        needLaneID = true;

> -      }

>        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {

>          const ir::Register reg = insn.getSrc(srcID);

>          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 

> +2257,6 @@ namespace gbe

>      });

>  #undef INSERT_REG

> 

> -    if (needLaneID)

> -      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);

> -

>      // After this point the vector is immutable. Sorting it will make

>      // research faster

>      std::sort(kernel->patches.begin(), kernel->patches.end()); diff 

> --git a/backend/src/backend/gen_insn_selection.cpp

> b/backend/src/backend/gen_insn_selection.cpp

> index b0ba9e3..9772ad1 100644

> --- a/backend/src/backend/gen_insn_selection.cpp

> +++ b/backend/src/backend/gen_insn_selection.cpp

> @@ -2299,8 +2299,22 @@ namespace gbe

>            break;

>          case ir::OP_SIMD_ID:

>            {

> -            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);

> -            sel.MOV(dst, selLaneID);

> +            const GenRegister landID = GenRegister::immv(0x76543210);

> +            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);

> +            const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);

> +

> +            uint32_t execWidth = sel.curr.execWidth;

> +            sel.curr.execWidth = 8;

> +            sel.MOV(dst_, landID);

> +

> +            if (execWidth == 16) {

> +                //Packed Unsigned Half-Byte Integer Vector does not work

> +                //have to mock by adding 8 to the singed vector

> +                const GenRegister eight = GenRegister::immuw(8);

> +                sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);

> +                sel.curr.execWidth = 16;

> +            }


Because you have changed the state, I think it is better to use sel.push(), sel.pop().

> +            sel.MOV(dst, dst_);

>            }

>            break;

>          default: NOT_SUPPORTED;

> diff --git a/backend/src/backend/program.h 

> b/backend/src/backend/program.h index 3637ebb..56db1a1 100644

> --- a/backend/src/backend/program.h

> +++ b/backend/src/backend/program.h

> @@ -101,7 +101,6 @@ enum gbe_curbe_type {

>    GBE_CURBE_THREAD_NUM,

>    GBE_CURBE_ZERO,

>    GBE_CURBE_ONE,

> -  GBE_CURBE_LANE_ID,

>    GBE_CURBE_SLM_OFFSET,

>    GBE_CURBE_BTI_UTIL,

>  };

> diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp 

> index af9f698..37f2d3d 100644

> --- a/backend/src/ir/profile.cpp

> +++ b/backend/src/ir/profile.cpp

> @@ -90,7 +90,6 @@ namespace ir {

>        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);

>        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);

>        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);

> -      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);

>        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);

>        DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);

>      }

> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp 

> index 9323824..bf909be 100644

> --- a/backend/src/ir/profile.hpp

> +++ b/backend/src/ir/profile.hpp

> @@ -72,10 +72,9 @@ namespace ir {

>      static const Register printfbptr = Register(28); // printf buffer address .

>      static const Register printfiptr = Register(29); // printf index buffer address.

>      static const Register dwblockip = Register(30);  // blockip

> -    static const Register laneid = Register(31);  // lane id.

> -    static const Register invalid = Register(32);  // used for valid comparation.

> -    static const Register btiUtil = Register(33);  // used for mixed pointer as bti

> utility.

> -    static const uint32_t regNum = 34;             // number of special registers

> +    static const Register invalid = Register(31);  // used for valid comparation.

> +    static const Register btiUtil = Register(32);  // used for mixed 

> + pointer as bti

> utility.

> +    static const uint32_t regNum = 33;             // number of special registers

>      extern const char *specialRegMean[];           // special register name.

>    } /* namespace ocl */

> 

> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c 

> index 89f39b3..4adbd2b 100644

> --- a/src/cl_command_queue_gen7.c

> +++ b/src/cl_command_queue_gen7.c

> @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,

>    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD

> 

> -  /* get_sub_group_id needs it */

> -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,

> GBE_CURBE_LANE_ID, 0)) >= 0) {

> -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);

> -    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);

> -    int32_t i;

> -    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;

> -  }

> -

>    /* Write identity for the stack pointer. This is required by the stack pointer

>     * computation in the kernel

>     */

> --

> 1.9.1

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet
Ok, pop and push don't need here.
But you should set the correct p->curr.quarterControl when doing the half of 16 lanes, otherwise the flag mask is not correct.

> -----Original Message-----

> From: Guo, Yejun

> Sent: Monday, August 10, 2015 12:06

> To: Yang, Rong R; beignet@lists.freedesktop.org

> Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel instead

> of payload

> 

> The sel.push()/pop() is already at the beginning/end of the function, shall I

> add it again?

> 

> Btw, I plan to encapsulate into a function so that others can get the lane id

> easily.

> 

> -----Original Message-----

> From: Yang, Rong R

> Sent: Monday, August 10, 2015 11:37 AM

> To: Guo, Yejun; beignet@lists.freedesktop.org

> Cc: Guo, Yejun

> Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel instead

> of payload

> 

> One comment.

> 

> > -----Original Message-----

> > From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf

> > Of Guo Yejun

> > Sent: Friday, August 7, 2015 08:51

> > To: beignet@lists.freedesktop.org

> > Cc: Guo, Yejun

> > Subject: [Beignet] [PATCH] generate sub_group_id inside kernel instead

> > of payload

> >

> > get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,

> > previously we set up the values in kernel payload, now change it to

> > generate the values inside kernel with packed integer vector.

> >

> > Signed-off-by: Guo Yejun <yejun.guo@intel.com>

> > ---

> >  backend/src/backend/gen_context.cpp        |  8 --------

> >  backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--

> >  backend/src/backend/program.h              |  1 -

> >  backend/src/ir/profile.cpp                 |  1 -

> >  backend/src/ir/profile.hpp                 |  7 +++----

> >  src/cl_command_queue_gen7.c                |  8 --------

> >  6 files changed, 19 insertions(+), 24 deletions(-)

> >

> > diff --git a/backend/src/backend/gen_context.cpp

> > b/backend/src/backend/gen_context.cpp

> > index e16b0a9..29b58df 100644

> > --- a/backend/src/backend/gen_context.cpp

> > +++ b/backend/src/backend/gen_context.cpp

> > @@ -2217,13 +2217,8 @@ namespace gbe

> >        allocCurbeReg(reg, GBE_CURBE_##PATCH); \

> >      } else

> >

> > -    bool needLaneID = false;

> >      fn.foreachInstruction([&](ir::Instruction &insn) {

> >        const uint32_t srcNum = insn.getSrcNum();

> > -      if (insn.getOpcode() == ir::OP_SIMD_ID) {

> > -        GBE_ASSERT(srcNum == 0);

> > -        needLaneID = true;

> > -      }

> >        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {

> >          const ir::Register reg = insn.getSrc(srcID);

> >          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9

> > +2257,6 @@ namespace gbe

> >      });

> >  #undef INSERT_REG

> >

> > -    if (needLaneID)

> > -      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);

> > -

> >      // After this point the vector is immutable. Sorting it will make

> >      // research faster

> >      std::sort(kernel->patches.begin(), kernel->patches.end()); diff

> > --git a/backend/src/backend/gen_insn_selection.cpp

> > b/backend/src/backend/gen_insn_selection.cpp

> > index b0ba9e3..9772ad1 100644

> > --- a/backend/src/backend/gen_insn_selection.cpp

> > +++ b/backend/src/backend/gen_insn_selection.cpp

> > @@ -2299,8 +2299,22 @@ namespace gbe

> >            break;

> >          case ir::OP_SIMD_ID:

> >            {

> > -            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid,

> ir::TYPE_U32);

> > -            sel.MOV(dst, selLaneID);

> > +            const GenRegister landID = GenRegister::immv(0x76543210);

> > +            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);

> > +            const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);

> > +

> > +            uint32_t execWidth = sel.curr.execWidth;

> > +            sel.curr.execWidth = 8;

> > +            sel.MOV(dst_, landID);

> > +

> > +            if (execWidth == 16) {

> > +                //Packed Unsigned Half-Byte Integer Vector does not work

> > +                //have to mock by adding 8 to the singed vector

> > +                const GenRegister eight = GenRegister::immuw(8);

> > +                sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);

> > +                sel.curr.execWidth = 16;

> > +            }

> 

> Because you have changed the state, I think it is better to use sel.push(),

> sel.pop().

> 

> > +            sel.MOV(dst, dst_);

> >            }

> >            break;

> >          default: NOT_SUPPORTED;

> > diff --git a/backend/src/backend/program.h

> > b/backend/src/backend/program.h index 3637ebb..56db1a1 100644

> > --- a/backend/src/backend/program.h

> > +++ b/backend/src/backend/program.h

> > @@ -101,7 +101,6 @@ enum gbe_curbe_type {

> >    GBE_CURBE_THREAD_NUM,

> >    GBE_CURBE_ZERO,

> >    GBE_CURBE_ONE,

> > -  GBE_CURBE_LANE_ID,

> >    GBE_CURBE_SLM_OFFSET,

> >    GBE_CURBE_BTI_UTIL,

> >  };

> > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp

> > index af9f698..37f2d3d 100644

> > --- a/backend/src/ir/profile.cpp

> > +++ b/backend/src/ir/profile.cpp

> > @@ -90,7 +90,6 @@ namespace ir {

> >        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);

> >        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);

> >        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);

> > -      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);

> >        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);

> >        DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);

> >      }

> > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp

> > index 9323824..bf909be 100644

> > --- a/backend/src/ir/profile.hpp

> > +++ b/backend/src/ir/profile.hpp

> > @@ -72,10 +72,9 @@ namespace ir {

> >      static const Register printfbptr = Register(28); // printf buffer address .

> >      static const Register printfiptr = Register(29); // printf index buffer

> address.

> >      static const Register dwblockip = Register(30);  // blockip

> > -    static const Register laneid = Register(31);  // lane id.

> > -    static const Register invalid = Register(32);  // used for valid comparation.

> > -    static const Register btiUtil = Register(33);  // used for mixed pointer as

> bti

> > utility.

> > -    static const uint32_t regNum = 34;             // number of special registers

> > +    static const Register invalid = Register(31);  // used for valid comparation.

> > +    static const Register btiUtil = Register(32);  // used for mixed

> > + pointer as bti

> > utility.

> > +    static const uint32_t regNum = 33;             // number of special registers

> >      extern const char *specialRegMean[];           // special register name.

> >    } /* namespace ocl */

> >

> > diff --git a/src/cl_command_queue_gen7.c

> b/src/cl_command_queue_gen7.c

> > index 89f39b3..4adbd2b 100644

> > --- a/src/cl_command_queue_gen7.c

> > +++ b/src/cl_command_queue_gen7.c

> > @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,

> >    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD

> >

> > -  /* get_sub_group_id needs it */

> > -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,

> > GBE_CURBE_LANE_ID, 0)) >= 0) {

> > -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);

> > -    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);

> > -    int32_t i;

> > -    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;

> > -  }

> > -

> >    /* Write identity for the stack pointer. This is required by the stack

> pointer

> >     * computation in the kernel

> >     */

> > --

> > 1.9.1

> >

> > _______________________________________________

> > Beignet mailing list

> > Beignet@lists.freedesktop.org

> > http://lists.freedesktop.org/mailman/listinfo/beignet
Because the mov's dst is tmp register, not the finally dst, so need not set p->curr.quarterControl here.

The patch LGTM, thanks.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> Yang, Rong R

> Sent: Monday, August 10, 2015 13:47

> To: Guo, Yejun; beignet@lists.freedesktop.org

> Subject: Re: [Beignet] [PATCH] generate sub_group_id inside kernel instead

> of payload

> 

> Ok, pop and push don't need here.

> But you should set the correct p->curr.quarterControl when doing the half of

> 16 lanes, otherwise the flag mask is not correct.

> 

> > -----Original Message-----

> > From: Guo, Yejun

> > Sent: Monday, August 10, 2015 12:06

> > To: Yang, Rong R; beignet@lists.freedesktop.org

> > Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel

> > instead of payload

> >

> > The sel.push()/pop() is already at the beginning/end of the function,

> > shall I add it again?

> >

> > Btw, I plan to encapsulate into a function so that others can get the

> > lane id easily.

> >

> > -----Original Message-----

> > From: Yang, Rong R

> > Sent: Monday, August 10, 2015 11:37 AM

> > To: Guo, Yejun; beignet@lists.freedesktop.org

> > Cc: Guo, Yejun

> > Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel

> > instead of payload

> >

> > One comment.

> >

> > > -----Original Message-----

> > > From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On

> > > Behalf Of Guo Yejun

> > > Sent: Friday, August 7, 2015 08:51

> > > To: beignet@lists.freedesktop.org

> > > Cc: Guo, Yejun

> > > Subject: [Beignet] [PATCH] generate sub_group_id inside kernel

> > > instead of payload

> > >

> > > get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,

> > > previously we set up the values in kernel payload, now change it to

> > > generate the values inside kernel with packed integer vector.

> > >

> > > Signed-off-by: Guo Yejun <yejun.guo@intel.com>

> > > ---

> > >  backend/src/backend/gen_context.cpp        |  8 --------

> > >  backend/src/backend/gen_insn_selection.cpp | 18

> ++++++++++++++++--

> > >  backend/src/backend/program.h              |  1 -

> > >  backend/src/ir/profile.cpp                 |  1 -

> > >  backend/src/ir/profile.hpp                 |  7 +++----

> > >  src/cl_command_queue_gen7.c                |  8 --------

> > >  6 files changed, 19 insertions(+), 24 deletions(-)

> > >

> > > diff --git a/backend/src/backend/gen_context.cpp

> > > b/backend/src/backend/gen_context.cpp

> > > index e16b0a9..29b58df 100644

> > > --- a/backend/src/backend/gen_context.cpp

> > > +++ b/backend/src/backend/gen_context.cpp

> > > @@ -2217,13 +2217,8 @@ namespace gbe

> > >        allocCurbeReg(reg, GBE_CURBE_##PATCH); \

> > >      } else

> > >

> > > -    bool needLaneID = false;

> > >      fn.foreachInstruction([&](ir::Instruction &insn) {

> > >        const uint32_t srcNum = insn.getSrcNum();

> > > -      if (insn.getOpcode() == ir::OP_SIMD_ID) {

> > > -        GBE_ASSERT(srcNum == 0);

> > > -        needLaneID = true;

> > > -      }

> > >        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {

> > >          const ir::Register reg = insn.getSrc(srcID);

> > >          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9

> > > +2257,6 @@ namespace gbe

> > >      });

> > >  #undef INSERT_REG

> > >

> > > -    if (needLaneID)

> > > -      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);

> > > -

> > >      // After this point the vector is immutable. Sorting it will make

> > >      // research faster

> > >      std::sort(kernel->patches.begin(), kernel->patches.end()); diff

> > > --git a/backend/src/backend/gen_insn_selection.cpp

> > > b/backend/src/backend/gen_insn_selection.cpp

> > > index b0ba9e3..9772ad1 100644

> > > --- a/backend/src/backend/gen_insn_selection.cpp

> > > +++ b/backend/src/backend/gen_insn_selection.cpp

> > > @@ -2299,8 +2299,22 @@ namespace gbe

> > >            break;

> > >          case ir::OP_SIMD_ID:

> > >            {

> > > -            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid,

> > ir::TYPE_U32);

> > > -            sel.MOV(dst, selLaneID);

> > > +            const GenRegister landID = GenRegister::immv(0x76543210);

> > > +            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);

> > > +            const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);

> > > +

> > > +            uint32_t execWidth = sel.curr.execWidth;

> > > +            sel.curr.execWidth = 8;

> > > +            sel.MOV(dst_, landID);

> > > +

> > > +            if (execWidth == 16) {

> > > +                //Packed Unsigned Half-Byte Integer Vector does not work

> > > +                //have to mock by adding 8 to the singed vector

> > > +                const GenRegister eight = GenRegister::immuw(8);

> > > +                sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);

> > > +                sel.curr.execWidth = 16;

> > > +            }

> >

> > Because you have changed the state, I think it is better to use

> > sel.push(), sel.pop().

> >

> > > +            sel.MOV(dst, dst_);

> > >            }

> > >            break;

> > >          default: NOT_SUPPORTED;

> > > diff --git a/backend/src/backend/program.h

> > > b/backend/src/backend/program.h index 3637ebb..56db1a1 100644

> > > --- a/backend/src/backend/program.h

> > > +++ b/backend/src/backend/program.h

> > > @@ -101,7 +101,6 @@ enum gbe_curbe_type {

> > >    GBE_CURBE_THREAD_NUM,

> > >    GBE_CURBE_ZERO,

> > >    GBE_CURBE_ONE,

> > > -  GBE_CURBE_LANE_ID,

> > >    GBE_CURBE_SLM_OFFSET,

> > >    GBE_CURBE_BTI_UTIL,

> > >  };

> > > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp

> > > index af9f698..37f2d3d 100644

> > > --- a/backend/src/ir/profile.cpp

> > > +++ b/backend/src/ir/profile.cpp

> > > @@ -90,7 +90,6 @@ namespace ir {

> > >        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);

> > >        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);

> > >        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);

> > > -      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);

> > >        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);

> > >        DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);

> > >      }

> > > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp

> > > index 9323824..bf909be 100644

> > > --- a/backend/src/ir/profile.hpp

> > > +++ b/backend/src/ir/profile.hpp

> > > @@ -72,10 +72,9 @@ namespace ir {

> > >      static const Register printfbptr = Register(28); // printf buffer address .

> > >      static const Register printfiptr = Register(29); // printf

> > > index buffer

> > address.

> > >      static const Register dwblockip = Register(30);  // blockip

> > > -    static const Register laneid = Register(31);  // lane id.

> > > -    static const Register invalid = Register(32);  // used for valid

> comparation.

> > > -    static const Register btiUtil = Register(33);  // used for mixed pointer as

> > bti

> > > utility.

> > > -    static const uint32_t regNum = 34;             // number of special registers

> > > +    static const Register invalid = Register(31);  // used for valid

> comparation.

> > > +    static const Register btiUtil = Register(32);  // used for

> > > + mixed pointer as bti

> > > utility.

> > > +    static const uint32_t regNum = 33;             // number of special registers

> > >      extern const char *specialRegMean[];           // special register name.

> > >    } /* namespace ocl */

> > >

> > > diff --git a/src/cl_command_queue_gen7.c

> > b/src/cl_command_queue_gen7.c

> > > index 89f39b3..4adbd2b 100644

> > > --- a/src/cl_command_queue_gen7.c

> > > +++ b/src/cl_command_queue_gen7.c

> > > @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,

> > >    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD

> > >

> > > -  /* get_sub_group_id needs it */

> > > -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,

> > > GBE_CURBE_LANE_ID, 0)) >= 0) {

> > > -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker-

> >opaque);

> > > -    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);

> > > -    int32_t i;

> > > -    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;

> > > -  }

> > > -

> > >    /* Write identity for the stack pointer. This is required by the

> > > stack

> > pointer

> > >     * computation in the kernel

> > >     */

> > > --

> > > 1.9.1

> > >

> > > _______________________________________________

> > > Beignet mailing list

> > > Beignet@lists.freedesktop.org

> > > http://lists.freedesktop.org/mailman/listinfo/beignet

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet