Runtime: refine max group size for SKL & KBL

Submitted by rander on June 23, 2017, 3:02 a.m.

Details

Message ID 1498186947-15295-1-git-send-email-rander.wang@intel.com
State New
Headers show
Series "Runtime: refine max group size for SKL & KBL" ( rev: 1 ) in Beignet

Not browsing as part of any series.

Commit Message

rander June 23, 2017, 3:02 a.m.
Now change max group size to 256. it is a reasonable
	size for Gen9. According to performance test, 256 make
	good progress in openCV and no regression. So change it

Signed-off-by: rander.wang <rander.wang@intel.com>
---
 src/cl_device_id.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 6cba2b5..5ea13a9 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -149,7 +149,7 @@  static struct _cl_device_id intel_skl_gt1_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 2,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -159,7 +159,7 @@  static struct _cl_device_id intel_skl_gt2_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 3,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -169,7 +169,7 @@  static struct _cl_device_id intel_skl_gt3_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 6,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -179,7 +179,7 @@  static struct _cl_device_id intel_skl_gt4_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 9,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -209,7 +209,7 @@  static struct _cl_device_id intel_kbl_gt1_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 2,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -219,7 +219,7 @@  static struct _cl_device_id intel_kbl_gt15_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 3,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -229,7 +229,7 @@  static struct _cl_device_id intel_kbl_gt2_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 3,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -239,7 +239,7 @@  static struct _cl_device_id intel_kbl_gt3_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 6,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };
@@ -249,7 +249,7 @@  static struct _cl_device_id intel_kbl_gt4_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 9,
   .max_work_item_sizes = {512, 512, 512},
-  .max_work_group_size = 512,
+  .max_work_group_size = 256,
   .max_clock_frequency = 1000,
 #include "cl_gen9_device.h"
 };

Comments

LGTM, pushed, thanks.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> rander.wang

> Sent: Friday, June 23, 2017 11:02

> To: beignet@freedesktop.org

> Cc: Wang, Rander <rander.wang@intel.com>

> Subject: [Beignet] [PATCH] Runtime: refine max group size for SKL & KBL

> 

> 	Now change max group size to 256. it is a reasonable

> 	size for Gen9. According to performance test, 256 make

> 	good progress in openCV and no regression. So change it

> 

> Signed-off-by: rander.wang <rander.wang@intel.com>

> ---

>  src/cl_device_id.c | 18 +++++++++---------

>  1 file changed, 9 insertions(+), 9 deletions(-)

> 

> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 6cba2b5..5ea13a9

> 100644

> --- a/src/cl_device_id.c

> +++ b/src/cl_device_id.c

> @@ -149,7 +149,7 @@ static struct _cl_device_id intel_skl_gt1_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 2,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -159,7 +159,7 @@ static struct _cl_device_id intel_skl_gt2_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 3,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -169,7 +169,7 @@ static struct _cl_device_id intel_skl_gt3_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 6,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -179,7 +179,7 @@ static struct _cl_device_id intel_skl_gt4_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 9,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -209,7 +209,7 @@ static struct _cl_device_id intel_kbl_gt1_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 2,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -219,7 +219,7 @@ static struct _cl_device_id intel_kbl_gt15_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 3,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -229,7 +229,7 @@ static struct _cl_device_id intel_kbl_gt2_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 3,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -239,7 +239,7 @@ static struct _cl_device_id intel_kbl_gt3_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 6,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> @@ -249,7 +249,7 @@ static struct _cl_device_id intel_kbl_gt4_device = {

>    .max_thread_per_unit = 7,

>    .sub_slice_count = 9,

>    .max_work_item_sizes = {512, 512, 512},

> -  .max_work_group_size = 512,

> +  .max_work_group_size = 256,

>    .max_clock_frequency = 1000,

>  #include "cl_gen9_device.h"

>  };

> --

> 2.7.4

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/beignet