[2/2] use self test to determine enable/or disable atomics in L3 for HSW.

Submitted by Luo, Xionghu on June 12, 2015, 1:10 a.m.

Details

Message ID 1434071401-19966-2-git-send-email-xionghu.luo@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Luo, Xionghu June 12, 2015, 1:10 a.m.
From: Luo Xionghu <xionghu.luo@intel.com>

check the selftest kernel return value, if enqueue kernel failed,
set the flag to not enable atomics the L3 for HSW.

This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.

Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
---
 src/cl_device_id.c        | 30 ++++++++++++++++++------------
 src/cl_utils.h            |  2 ++
 src/intel/intel_defines.h |  3 +++
 src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-
 4 files changed, 40 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 215f7f2..278a91c 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -197,6 +197,7 @@  static struct _cl_device_id intel_skl_gt4_device = {
 #include "cl_gen75_device.h"
 };
 
+int atomic_test_result = 0;
 
 LOCAL cl_device_id
 cl_get_gt_device(void)
@@ -628,18 +629,23 @@  cl_get_device_ids(cl_platform_id    platform,
 
   /* Do we have a usable device? */
   device = cl_get_gt_device();
-  if (device && cl_self_test(device)) {
-    int disable_self_test = 0;
-    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
-    const char *env = getenv("OCL_IGNORE_SELF_TEST");
-    if (env != NULL) {
-      sscanf(env, "%i", &disable_self_test);
-    }
-    if (disable_self_test) {
-      printf("Beignet: Warning - overriding self-test failure\n");
-    } else {
-      printf("Beignet: disabling non-working device\n");
-      device = 0;
+  if (device) {
+    int ret = cl_self_test(device);
+    if(ret != 0) {
+      if(ret == 2)
+        atomic_test_result = 1;
+      int disable_self_test = 0;
+      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
+      const char *env = getenv("OCL_IGNORE_SELF_TEST");
+      if (env != NULL) {
+        sscanf(env, "%i", &disable_self_test);
+      }
+      if (disable_self_test) {
+        printf("Beignet: Warning - overriding self-test failure\n");
+      } else {
+        printf("Beignet: disabling non-working device\n");
+        device = 0;
+      }
     }
   }
   if (!device) {
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 28fdef6..17bb0f5 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -312,5 +312,7 @@  static INLINE int atomic_add(atomic_t *v, const int c) {
 static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); }
 static INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); }
 
+extern int atomic_test_result;
+
 #endif /* __CL_UTILS_H__ */
 
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 1080a91..6ada30c 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -304,6 +304,9 @@ 
 
 #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
                                  IS_G4X(intel->device_id) ? 384 : 256)
+// HSW
+#define HSW_SCRATCH1_OFFSET                      (0xB038)
+#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)
 
 // L3 cache stuff 
 #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index b083dab..7257d46 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -719,7 +719,23 @@  static void
 intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
 {
   /* still set L3 in batch buffer for fulsim. */
-  BEGIN_BATCH(gpgpu->batch, 9);
+  if(atomic_test_result == 0)
+  {
+    BEGIN_BATCH(gpgpu->batch, 15);
+    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+    /* FIXME: KMD always disable the atomic in L3 for some reason.
+       I checked the spec, and don't think we need that workaround now.
+       Before I send a patch to kernel, let's just enable it here. */
+    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
+    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in L3 */
+    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
+    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in L3 */
+  }
+  else
+  {
+    BEGIN_BATCH(gpgpu->batch, 9);
+  }
   OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
   OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
   OUT_BATCH(gpgpu->batch, 0x08800000);

Comments

Can you avoid use global var atomic_test_result? It is not good manner using global var to communicate between cl and intel component.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> xionghu.luo@intel.com

> Sent: Friday, June 12, 2015 09:10

> To: beignet@lists.freedesktop.org

> Cc: Luo, Xionghu

> Subject: [Beignet] [PATCH 2/2] use self test to determine enable/or disable

> atomics in L3 for HSW.

> 

> From: Luo Xionghu <xionghu.luo@intel.com>

> 

> check the selftest kernel return value, if enqueue kernel failed, set the flag

> to not enable atomics the L3 for HSW.

> 

> This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.

> 

> Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>

> ---

>  src/cl_device_id.c        | 30 ++++++++++++++++++------------

>  src/cl_utils.h            |  2 ++

>  src/intel/intel_defines.h |  3 +++

>  src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-

>  4 files changed, 40 insertions(+), 13 deletions(-)

> 

> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..278a91c

> 100644

> --- a/src/cl_device_id.c

> +++ b/src/cl_device_id.c

> @@ -197,6 +197,7 @@ static struct _cl_device_id intel_skl_gt4_device =

> {  #include "cl_gen75_device.h"

>  };

> 

> +int atomic_test_result = 0;

> 

>  LOCAL cl_device_id

>  cl_get_gt_device(void)

> @@ -628,18 +629,23 @@ cl_get_device_ids(cl_platform_id    platform,

> 

>    /* Do we have a usable device? */

>    device = cl_get_gt_device();

> -  if (device && cl_self_test(device)) {

> -    int disable_self_test = 0;

> -    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++

> -    const char *env = getenv("OCL_IGNORE_SELF_TEST");

> -    if (env != NULL) {

> -      sscanf(env, "%i", &disable_self_test);

> -    }

> -    if (disable_self_test) {

> -      printf("Beignet: Warning - overriding self-test failure\n");

> -    } else {

> -      printf("Beignet: disabling non-working device\n");

> -      device = 0;

> +  if (device) {

> +    int ret = cl_self_test(device);

> +    if(ret != 0) {

> +      if(ret == 2)

> +        atomic_test_result = 1;

> +      int disable_self_test = 0;

> +      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++

> +      const char *env = getenv("OCL_IGNORE_SELF_TEST");

> +      if (env != NULL) {

> +        sscanf(env, "%i", &disable_self_test);

> +      }

> +      if (disable_self_test) {

> +        printf("Beignet: Warning - overriding self-test failure\n");

> +      } else {

> +        printf("Beignet: disabling non-working device\n");

> +        device = 0;

> +      }

>      }

>    }

>    if (!device) {

> diff --git a/src/cl_utils.h b/src/cl_utils.h index 28fdef6..17bb0f5 100644

> --- a/src/cl_utils.h

> +++ b/src/cl_utils.h

> @@ -312,5 +312,7 @@ static INLINE int atomic_add(atomic_t *v, const int c)

> {  static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); }  static

> INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); }

> 

> +extern int atomic_test_result;

> +

>  #endif /* __CL_UTILS_H__ */

> 

> diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index

> 1080a91..6ada30c 100644

> --- a/src/intel/intel_defines.h

> +++ b/src/intel/intel_defines.h

> @@ -304,6 +304,9 @@

> 

>  #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \

>                                   IS_G4X(intel->device_id) ? 384 : 256)

> +// HSW

> +#define HSW_SCRATCH1_OFFSET                      (0xB038)

> +#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)

> 

>  // L3 cache stuff

>  #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)

> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index

> b083dab..7257d46 100644

> --- a/src/intel/intel_gpgpu.c

> +++ b/src/intel/intel_gpgpu.c

> @@ -719,7 +719,23 @@ static void

>  intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)  {

>    /* still set L3 in batch buffer for fulsim. */

> -  BEGIN_BATCH(gpgpu->batch, 9);

> +  if(atomic_test_result == 0)

> +  {

> +    BEGIN_BATCH(gpgpu->batch, 15);

> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -

> 2 */

> +    /* FIXME: KMD always disable the atomic in L3 for some reason.

> +       I checked the spec, and don't think we need that workaround now.

> +       Before I send a patch to kernel, let's just enable it here. */

> +    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);

> +    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in L3 */

> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -

> 2 */

> +    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);

> +    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in L3

> */

> +  }

> +  else

> +  {

> +    BEGIN_BATCH(gpgpu->batch, 9);

> +  }

>    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2

> */

>    OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);

>    OUT_BATCH(gpgpu->batch, 0x08800000);

> --

> 1.9.1

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet