[v3] use self test to determine enable/or disable atomics in L3 for HSW.

Submitted by Luo, Xionghu on June 29, 2015, 7:22 a.m.

Details

Message ID 1435562538-20000-1-git-send-email-xionghu.luo@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Luo, Xionghu June 29, 2015, 7:22 a.m.
From: Luo Xionghu <xionghu.luo@intel.com>

check the selftest kernel return value, if enqueue kernel failed,
set the flag to not enable atomics the L3 for HSW.

This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.
v2: don't use global variable to pass value from runtime to driver.
v3: add type SELF_TEST_OTHER_FAIL to differentiate from SELF_TEST_ATOMIC_FAIL;
seperate the ATOMIC_FAIL from SLM_FAIL, only SLM_FAIL can be control by
env OCL_IGNORE_SELF_TEST.

Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
---
 src/cl_context.c          |  1 +
 src/cl_device_id.c        | 48 +++++++++++++++++++++++++----------------------
 src/cl_device_id.h        |  1 +
 src/cl_driver.h           |  9 +++++++++
 src/cl_driver_defs.c      |  1 +
 src/intel/intel_defines.h |  3 +++
 src/intel/intel_driver.c  |  7 +++++++
 src/intel/intel_driver.h  |  1 +
 src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-
 9 files changed, 66 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/cl_context.c b/src/cl_context.c
index 0f08e6a..773f545 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -149,6 +149,7 @@  cl_create_context(const cl_context_properties *  properties,
   /* Save the user callback and user data*/
   ctx->pfn_notify = pfn_notify;
   ctx->user_data = user_data;
+  cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);
 
 exit:
   if (errcode_ret != NULL)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 215f7f2..9d3ab2f 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -197,7 +197,6 @@  static struct _cl_device_id intel_skl_gt4_device = {
 #include "cl_gen75_device.h"
 };
 
-
 LOCAL cl_device_id
 cl_get_gt_device(void)
 {
@@ -546,8 +545,11 @@  skl_gt4_break:
 }
 
 /* Runs a small kernel to check that the device works; returns
- * 0 for success, 1 for silently wrong result, 2 for error */
-LOCAL cl_int
+ * SELF_TEST_PASS: for success.
+ * SELF_TEST_SLM_FAIL: for SLM results mismatch;
+ * SELF_TEST_ATOMIC_FAIL: for hsw enqueue  kernel failure to not enable atomics in L3.
+ * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/
+LOCAL cl_self_test_res
 cl_self_test(cl_device_id device)
 {
   cl_int status;
@@ -566,7 +568,7 @@  cl_self_test(cl_device_id device)
   "  buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"
   "}"; // using __local to catch the "no SLM on Haswell" problem
   static int tested = 0;
-  static cl_int ret = 2;
+  static cl_self_test_res ret = SELF_TEST_OTHER_FAIL;
   if (tested != 0)
     return ret;
   tested = 1;
@@ -589,14 +591,16 @@  cl_self_test(cl_device_id device)
                   status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 1, &kernel_finished, NULL);
                   if (status == CL_SUCCESS) {
                     if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){
-                      ret = 0;
+                      ret = SELF_TEST_PASS;
                     } else {
-                      ret = 1;
+                      ret = SELF_TEST_SLM_FAIL;
                       printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n"
                              "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
                              test_data[0], test_data[1], test_data[2]);
                     }
                   }
+                } else{
+                  ret = SELF_TEST_ATOMIC_FAIL;
                 }
               }
             }
@@ -610,10 +614,6 @@  cl_self_test(cl_device_id device)
     clReleaseCommandQueue(queue);
   }
   clReleaseContext(ctx);
-  if (ret == 2) {
-    printf("Beignet: self-test failed: error %i\n"
-           "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
-  }
   return ret;
 }
 
@@ -628,18 +628,22 @@  cl_get_device_ids(cl_platform_id    platform,
 
   /* Do we have a usable device? */
   device = cl_get_gt_device();
-  if (device && cl_self_test(device)) {
-    int disable_self_test = 0;
-    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
-    const char *env = getenv("OCL_IGNORE_SELF_TEST");
-    if (env != NULL) {
-      sscanf(env, "%i", &disable_self_test);
-    }
-    if (disable_self_test) {
-      printf("Beignet: Warning - overriding self-test failure\n");
-    } else {
-      printf("Beignet: disabling non-working device\n");
-      device = 0;
+  if (device) {
+    cl_self_test_res ret = cl_self_test(device);
+    device->atomic_test_result = ret;
+    if(ret == SELF_TEST_SLM_FAIL) {
+      int disable_self_test = 0;
+      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
+      const char *env = getenv("OCL_IGNORE_SELF_TEST");
+      if (env != NULL) {
+        sscanf(env, "%i", &disable_self_test);
+      }
+      if (disable_self_test) {
+        printf("Beignet: Warning - overriding self-test failure\n");
+      } else {
+        printf("Beignet: disabling non-working device\n");
+        device = 0;
+      }
     }
   }
   if (!device) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index ee6a8e6..1bd5806 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -113,6 +113,7 @@  struct _cl_device_id {
   cl_device_affinity_domain    affinity_domain;
   cl_device_partition_property partition_type[3];
   cl_uint      device_reference_count;
+  uint32_t atomic_test_result;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_driver.h b/src/cl_driver.h
index b2510de..1ab4dff 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -49,6 +49,15 @@  extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
 typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
 extern cl_driver_get_ver_cb *cl_driver_get_ver;
 
+typedef enum cl_self_test_res{
+  SELF_TEST_PASS = 0,
+  SELF_TEST_SLM_FAIL  = 1,
+  SELF_TEST_ATOMIC_FAIL = 2,
+  SELF_TEST_OTHER_FAIL = 3,
+} cl_self_test_res;
+/* Set the atomic enable/disable flag in the driver */
+typedef void (cl_driver_set_atomic_flag_cb)(cl_driver, int);
+extern cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag;
 /**************************************************************************
  * GPGPU command streamer
  **************************************************************************/
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 9a47210..b77acdc 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -25,6 +25,7 @@  LOCAL cl_driver_new_cb *cl_driver_new = NULL;
 LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
 LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
 LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
 LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
 LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
 
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 1080a91..6ada30c 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -304,6 +304,9 @@ 
 
 #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
                                  IS_G4X(intel->device_id) ? 384 : 256)
+// HSW
+#define HSW_SCRATCH1_OFFSET                      (0xB038)
+#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)
 
 // L3 cache stuff 
 #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 1bebd9a..9c72777 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -448,6 +448,12 @@  intel_driver_get_ver(struct intel_driver *drv)
   return drv->gen_ver;
 }
 
+static void
+intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
+{
+  drv->atomic_test_result = atomic_flag;
+}
+
 static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
 static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; }
 
@@ -834,6 +840,7 @@  intel_setup_callbacks(void)
   cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
   cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
   cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+  cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
   cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
   cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
   cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
index f972ec8..51f0e0d 100644
--- a/src/intel/intel_driver.h
+++ b/src/intel/intel_driver.h
@@ -89,6 +89,7 @@  typedef struct intel_driver
   Display *x11_display;
   struct dri_state *dri_ctx;
   struct intel_gpgpu_node *gpgpu_list;
+  int atomic_test_result;
 } intel_driver_t;
 
 #define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index b083dab..901bd98 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -719,7 +719,23 @@  static void
 intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
 {
   /* still set L3 in batch buffer for fulsim. */
-  BEGIN_BATCH(gpgpu->batch, 9);
+  if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)
+  {
+    BEGIN_BATCH(gpgpu->batch, 15);
+    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+    /* FIXME: KMD always disable the atomic in L3 for some reason.
+       I checked the spec, and don't think we need that workaround now.
+       Before I send a patch to kernel, let's just enable it here. */
+    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
+    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in L3 */
+    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
+    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in L3 */
+  }
+  else
+  {
+    BEGIN_BATCH(gpgpu->batch, 9);
+  }
   OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
   OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
   OUT_BATCH(gpgpu->batch, 0x08800000);

Comments

LGTM, thanks, pushed.

> -----Original Message-----

> From: Beignet [mailto:beignet-bounces@lists.freedesktop.org] On Behalf Of

> xionghu.luo@intel.com

> Sent: Monday, June 29, 2015 15:22

> To: beignet@lists.freedesktop.org

> Cc: Luo, Xionghu

> Subject: [Beignet] [PATCH v3] use self test to determine enable/or disable

> atomics in L3 for HSW.

> 

> From: Luo Xionghu <xionghu.luo@intel.com>

> 

> check the selftest kernel return value, if enqueue kernel failed, set the flag

> to not enable atomics the L3 for HSW.

> 

> This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.

> v2: don't use global variable to pass value from runtime to driver.

> v3: add type SELF_TEST_OTHER_FAIL to differentiate from

> SELF_TEST_ATOMIC_FAIL; seperate the ATOMIC_FAIL from SLM_FAIL, only

> SLM_FAIL can be control by env OCL_IGNORE_SELF_TEST.

> 

> Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>

> ---

>  src/cl_context.c          |  1 +

>  src/cl_device_id.c        | 48 +++++++++++++++++++++++++---------------------

> -

>  src/cl_device_id.h        |  1 +

>  src/cl_driver.h           |  9 +++++++++

>  src/cl_driver_defs.c      |  1 +

>  src/intel/intel_defines.h |  3 +++

>  src/intel/intel_driver.c  |  7 +++++++

>  src/intel/intel_driver.h  |  1 +

>  src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-

>  9 files changed, 66 insertions(+), 23 deletions(-)

> 

> diff --git a/src/cl_context.c b/src/cl_context.c index 0f08e6a..773f545 100644

> --- a/src/cl_context.c

> +++ b/src/cl_context.c

> @@ -149,6 +149,7 @@ cl_create_context(const cl_context_properties *

> properties,

>    /* Save the user callback and user data*/

>    ctx->pfn_notify = pfn_notify;

>    ctx->user_data = user_data;

> +  cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);

> 

>  exit:

>    if (errcode_ret != NULL)

> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..9d3ab2f

> 100644

> --- a/src/cl_device_id.c

> +++ b/src/cl_device_id.c

> @@ -197,7 +197,6 @@ static struct _cl_device_id intel_skl_gt4_device =

> {  #include "cl_gen75_device.h"

>  };

> 

> -

>  LOCAL cl_device_id

>  cl_get_gt_device(void)

>  {

> @@ -546,8 +545,11 @@ skl_gt4_break:

>  }

> 

>  /* Runs a small kernel to check that the device works; returns

> - * 0 for success, 1 for silently wrong result, 2 for error */ -LOCAL cl_int

> + * SELF_TEST_PASS: for success.

> + * SELF_TEST_SLM_FAIL: for SLM results mismatch;

> + * SELF_TEST_ATOMIC_FAIL: for hsw enqueue  kernel failure to not enable

> atomics in L3.

> + * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/ LOCAL

> +cl_self_test_res

>  cl_self_test(cl_device_id device)

>  {

>    cl_int status;

> @@ -566,7 +568,7 @@ cl_self_test(cl_device_id device)

>    "  buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"

>    "}"; // using __local to catch the "no SLM on Haswell" problem

>    static int tested = 0;

> -  static cl_int ret = 2;

> +  static cl_self_test_res ret = SELF_TEST_OTHER_FAIL;

>    if (tested != 0)

>      return ret;

>    tested = 1;

> @@ -589,14 +591,16 @@ cl_self_test(cl_device_id device)

>                    status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4,

> test_data, 1, &kernel_finished, NULL);

>                    if (status == CL_SUCCESS) {

>                      if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){

> -                      ret = 0;

> +                      ret = SELF_TEST_PASS;

>                      } else {

> -                      ret = 1;

> +                      ret = SELF_TEST_SLM_FAIL;

>                        printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned

> (%i, %i, %i)\n"

>                               "See README.md or

> http://www.freedesktop.org/wiki/Software/Beignet/\n",

>                               test_data[0], test_data[1], test_data[2]);

>                      }

>                    }

> +                } else{

> +                  ret = SELF_TEST_ATOMIC_FAIL;

>                  }

>                }

>              }

> @@ -610,10 +614,6 @@ cl_self_test(cl_device_id device)

>      clReleaseCommandQueue(queue);

>    }

>    clReleaseContext(ctx);

> -  if (ret == 2) {

> -    printf("Beignet: self-test failed: error %i\n"

> -           "See README.md or

> http://www.freedesktop.org/wiki/Software/Beignet/\n", status);

> -  }

>    return ret;

>  }

> 

> @@ -628,18 +628,22 @@ cl_get_device_ids(cl_platform_id    platform,

> 

>    /* Do we have a usable device? */

>    device = cl_get_gt_device();

> -  if (device && cl_self_test(device)) {

> -    int disable_self_test = 0;

> -    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++

> -    const char *env = getenv("OCL_IGNORE_SELF_TEST");

> -    if (env != NULL) {

> -      sscanf(env, "%i", &disable_self_test);

> -    }

> -    if (disable_self_test) {

> -      printf("Beignet: Warning - overriding self-test failure\n");

> -    } else {

> -      printf("Beignet: disabling non-working device\n");

> -      device = 0;

> +  if (device) {

> +    cl_self_test_res ret = cl_self_test(device);

> +    device->atomic_test_result = ret;

> +    if(ret == SELF_TEST_SLM_FAIL) {

> +      int disable_self_test = 0;

> +      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++

> +      const char *env = getenv("OCL_IGNORE_SELF_TEST");

> +      if (env != NULL) {

> +        sscanf(env, "%i", &disable_self_test);

> +      }

> +      if (disable_self_test) {

> +        printf("Beignet: Warning - overriding self-test failure\n");

> +      } else {

> +        printf("Beignet: disabling non-working device\n");

> +        device = 0;

> +      }

>      }

>    }

>    if (!device) {

> diff --git a/src/cl_device_id.h b/src/cl_device_id.h index ee6a8e6..1bd5806

> 100644

> --- a/src/cl_device_id.h

> +++ b/src/cl_device_id.h

> @@ -113,6 +113,7 @@ struct _cl_device_id {

>    cl_device_affinity_domain    affinity_domain;

>    cl_device_partition_property partition_type[3];

>    cl_uint      device_reference_count;

> +  uint32_t atomic_test_result;

>  };

> 

>  /* Get a device from the given platform */ diff --git a/src/cl_driver.h

> b/src/cl_driver.h index b2510de..1ab4dff 100644

> --- a/src/cl_driver.h

> +++ b/src/cl_driver.h

> @@ -49,6 +49,15 @@ extern cl_driver_get_bufmgr_cb

> *cl_driver_get_bufmgr;  typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);

>  extern cl_driver_get_ver_cb *cl_driver_get_ver;

> 

> +typedef enum cl_self_test_res{

> +  SELF_TEST_PASS = 0,

> +  SELF_TEST_SLM_FAIL  = 1,

> +  SELF_TEST_ATOMIC_FAIL = 2,

> +  SELF_TEST_OTHER_FAIL = 3,

> +} cl_self_test_res;

> +/* Set the atomic enable/disable flag in the driver */ typedef void

> +(cl_driver_set_atomic_flag_cb)(cl_driver, int); extern

> +cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag;

> 

> /**********************************************************

> ****************

>   * GPGPU command streamer

> 

> **********************************************************

> ****************/

> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 9a47210..b77acdc

> 100644

> --- a/src/cl_driver_defs.c

> +++ b/src/cl_driver_defs.c

> @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;

> LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;  LOCAL

> cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;  LOCAL

> cl_driver_get_ver_cb *cl_driver_get_ver = NULL;

> +LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;

>  LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;  LOCAL

> cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;

> 

> diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index

> 1080a91..6ada30c 100644

> --- a/src/intel/intel_defines.h

> +++ b/src/intel/intel_defines.h

> @@ -304,6 +304,9 @@

> 

>  #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \

>                                   IS_G4X(intel->device_id) ? 384 : 256)

> +// HSW

> +#define HSW_SCRATCH1_OFFSET                      (0xB038)

> +#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)

> 

>  // L3 cache stuff

>  #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)

> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index

> 1bebd9a..9c72777 100644

> --- a/src/intel/intel_driver.c

> +++ b/src/intel/intel_driver.c

> @@ -448,6 +448,12 @@ intel_driver_get_ver(struct intel_driver *drv)

>    return drv->gen_ver;

>  }

> 

> +static void

> +intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) {

> +  drv->atomic_test_result = atomic_flag; }

> +

>  static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }

> static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo-

> >virtual; }

> 

> @@ -834,6 +840,7 @@ intel_setup_callbacks(void)

>    cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;

>    cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;

>    cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;

> +  cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *)

> + intel_driver_set_atomic_flag;

>    cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *)

> intel_driver_get_bufmgr;

>    cl_driver_get_device_id = (cl_driver_get_device_id_cb *)

> intel_get_device_id;

>    cl_driver_update_device_info = (cl_driver_update_device_info_cb *)

> intel_update_device_info; diff --git a/src/intel/intel_driver.h

> b/src/intel/intel_driver.h index f972ec8..51f0e0d 100644

> --- a/src/intel/intel_driver.h

> +++ b/src/intel/intel_driver.h

> @@ -89,6 +89,7 @@ typedef struct intel_driver

>    Display *x11_display;

>    struct dri_state *dri_ctx;

>    struct intel_gpgpu_node *gpgpu_list;

> +  int atomic_test_result;

>  } intel_driver_t;

> 

>  #define SET_BLOCKED_SIGSET(DRIVER)   do {                     \

> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index

> b083dab..901bd98 100644

> --- a/src/intel/intel_gpgpu.c

> +++ b/src/intel/intel_gpgpu.c

> @@ -719,7 +719,23 @@ static void

>  intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)  {

>    /* still set L3 in batch buffer for fulsim. */

> -  BEGIN_BATCH(gpgpu->batch, 9);

> +  if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)  {

> +    BEGIN_BATCH(gpgpu->batch, 15);

> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -

> 2 */

> +    /* FIXME: KMD always disable the atomic in L3 for some reason.

> +       I checked the spec, and don't think we need that workaround now.

> +       Before I send a patch to kernel, let's just enable it here. */

> +    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);

> +    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in L3 */

> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -

> 2 */

> +    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);

> +    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in L3

> */

> +  }

> +  else

> +  {

> +    BEGIN_BATCH(gpgpu->batch, 9);

> +  }

>    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2

> */

>    OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);

>    OUT_BATCH(gpgpu->batch, 0x08800000);

> --

> 1.9.1

> 

> _______________________________________________

> Beignet mailing list

> Beignet@lists.freedesktop.org

> http://lists.freedesktop.org/mailman/listinfo/beignet