[v4,1/2] intel: 48b ppgtt support (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag)

Submitted by Michel Thierry on Sept. 3, 2015, 2:23 p.m.

Details

Message ID 1441290239-31462-2-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry Sept. 3, 2015, 2:23 p.m.
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.

Callers to the existing drm_intel_bo_emit_reloc function should set the
use_48b_address_range flag beforehand, in order to use full ppgtt range.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
    internally in emit_reloc functions (Ben)
    s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.
v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type
    before enabling set/clear function, print full offsets in debug
    statements, using port of lower_32_bits and upper_32_bits from linux
    kernel (Michał)

References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 include/drm/i915_drm.h    |  3 +-
 intel/intel_bufmgr.c      | 11 ++++++
 intel/intel_bufmgr.h      |  1 +
 intel/intel_bufmgr_gem.c  | 88 +++++++++++++++++++++++++++++++++++++----------
 intel/intel_bufmgr_priv.h | 14 ++++++++
 5 files changed, 97 insertions(+), 20 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@  struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..0856e60 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -293,6 +293,17 @@  drm_intel_bo_madvise(drm_intel_bo *bo, int madv)
 }
 
 int
+drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
+{
+	if (bo->bufmgr->bo_use_48b_address_range) {
+		bo->bufmgr->bo_use_48b_address_range(bo, enable);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+int
 drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
 {
 	return bo->bufmgr->bo_references(bo, target_bo);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 95eecb8..a14c78f 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -164,6 +164,7 @@  int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
 int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
 int drm_intel_bo_busy(drm_intel_bo *bo);
 int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
+int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable);
 
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
 int drm_intel_bo_is_reusable(drm_intel_bo *bo);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 2723e21..09d82d2 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -83,6 +83,22 @@ 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
 
+/**
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
+
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((__u32)(n))
+
 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
 
 struct drm_intel_gem_bo_bucket {
@@ -237,6 +253,15 @@  struct _drm_intel_bo_gem {
 	bool is_userptr;
 
 	/**
+	 * Boolean of whether this buffer can be placed in the full 48-bit
+	 * address range on gen8+.
+	 *
+	 * By default, buffers will be keep in a 32-bit range, unless this
+	 * flag is explicitly set.
+	 */
+	bool use_48b_address_range;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -395,14 +420,16 @@  drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
 			drm_intel_bo_gem *target_gem =
 			    (drm_intel_bo_gem *) target_bo;
 
-			DBG("%2d: %d (%s)@0x%08llx -> "
-			    "%d (%s)@0x%08lx + 0x%08x\n",
+			DBG("%2d: %d (%s)@0x%08x %08x -> "
+			    "%d (%s)@0x%08x %08x + 0x%08x\n",
 			    i,
 			    bo_gem->gem_handle, bo_gem->name,
-			    (unsigned long long)bo_gem->relocs[j].offset,
+			    upper_32_bits(bo_gem->relocs[j].offset),
+			    lower_32_bits(bo_gem->relocs[j].offset),
 			    target_gem->gem_handle,
 			    target_gem->name,
-			    target_bo->offset64,
+			    upper_32_bits(target_bo->offset64),
+			    lower_32_bits(target_bo->offset64),
 			    bo_gem->relocs[j].delta);
 		}
 	}
@@ -468,11 +495,15 @@  drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 	int index;
+	int flags = 0;
+
+	if (need_fence)
+		flags |= EXEC_OBJECT_NEEDS_FENCE;
+	if (bo_gem->use_48b_address_range)
+		flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
 	if (bo_gem->validate_index != -1) {
-		if (need_fence)
-			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
-				EXEC_OBJECT_NEEDS_FENCE;
+		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
 		return;
 	}
 
@@ -501,13 +532,9 @@  drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	bufmgr_gem->exec2_objects[index].alignment = bo->align;
 	bufmgr_gem->exec2_objects[index].offset = 0;
 	bufmgr_gem->exec_bos[index] = bo;
-	bufmgr_gem->exec2_objects[index].flags = 0;
+	bufmgr_gem->exec2_objects[index].flags = flags;
 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
 	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
-	if (need_fence) {
-		bufmgr_gem->exec2_objects[index].flags |=
-			EXEC_OBJECT_NEEDS_FENCE;
-	}
 	bufmgr_gem->exec_count++;
 }
 
@@ -780,6 +807,7 @@  retry:
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = true;
+	bo_gem->use_48b_address_range = false;
 
 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
 
@@ -926,6 +954,7 @@  drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
 
@@ -1081,6 +1110,7 @@  drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
 	bo_gem->bo.handle = open_arg.handle;
 	bo_gem->global_name = handle;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	memclear(get_tiling);
 	get_tiling.handle = bo_gem->gem_handle;
@@ -1930,6 +1960,13 @@  do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 	return 0;
 }
 
+static void
+drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	bo_gem->use_48b_address_range = enable;
+}
+
 static int
 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2073,10 +2110,12 @@  drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
 
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
-			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
-			    (unsigned long long)bufmgr_gem->exec_objects[i].
-			    offset);
+			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
+			    bo_gem->gem_handle, bo_gem->name,
+			    upper_32_bits(bo->offset64),
+			    lower_32_bits(bo->offset64),
+			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
+			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
 			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
 			bo->offset = bufmgr_gem->exec_objects[i].offset;
 		}
@@ -2094,9 +2133,12 @@  drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
 
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
-			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
-			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
+			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
+			    bo_gem->gem_handle, bo_gem->name,
+			    upper_32_bits(bo->offset64),
+			    lower_32_bits(bo->offset64),
+			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
+			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
 			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
 			bo->offset = bufmgr_gem->exec2_objects[i].offset;
 		}
@@ -2481,6 +2523,7 @@  drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
 	bo_gem->reusable = false;
+	bo_gem->use_48b_address_range = false;
 
 	DRMINITLISTHEAD(&bo_gem->vma_list);
 	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
@@ -3278,6 +3321,13 @@  drm_intel_bufmgr_gem_init(int fd, int batch_size)
 		}
 	}
 
+	if (bufmgr_gem->gen >= 8) {
+		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
+		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+		if (ret == 0 && *gp.value == 3)
+			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
+	}
+
 	/* Let's go with one relocation per every 2 dwords (but round down a bit
 	 * since a power of two will mean an extra page allocation for the reloc
 	 * buffer).
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 59ebd18..5c17ffb 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -152,6 +152,20 @@  struct _drm_intel_bufmgr {
 	void (*destroy) (drm_intel_bufmgr *bufmgr);
 
 	/**
+	 * Indicate if the buffer can be placed anywhere in the full ppgtt
+	 * address range (2^48).
+	 *
+	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
+	 * General State Heap (GSH) or Intructions State Heap (ISH) must
+	 * be in a 32-bit range. 48-bit range will only be used when explicitly
+	 * requested.
+	 *
+	 * \param bo Buffer to set the use_48b_address_range flag.
+	 * \param enable The flag value.
+	 */
+	void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable);
+
+	/**
 	 * Add relocation entry in reloc_buf, which will be updated with the
 	 * target buffer's real offset on on command submission.
 	 *

Comments

On Thu, Sep 03, 2015 at 03:23:58PM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> The i915 driver has been modified to provide a flag to set when the 4GB
> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
> 48-bit range will only be used when explicitly requested.
> 
> Callers to the existing drm_intel_bo_emit_reloc function should set the
> use_48b_address_range flag beforehand, in order to use full ppgtt range.
> 
> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>     internally in emit_reloc functions (Ben)
>     s/48BADDRESS/48B_ADDRESS/ (Dave)
> v3: Keep set/clear functions internal, no-one needs to use them directly.
> v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type
>     before enabling set/clear function, print full offsets in debug
>     statements, using port of lower_32_bits and upper_32_bits from linux
>     kernel (Michał)
> 
> References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
> Cc: Ben Widawsky <ben@bwidawsk.net>
> Cc: Michał Winiarski <michal.winiarski@intel.com>

+Kristian

LGTM.
Acked-by: Michał Winiarski <michal.winiarski@intel.com>

> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  include/drm/i915_drm.h    |  3 +-
>  intel/intel_bufmgr.c      | 11 ++++++
>  intel/intel_bufmgr.h      |  1 +
>  intel/intel_bufmgr_gem.c  | 88 +++++++++++++++++++++++++++++++++++++----------
>  intel/intel_bufmgr_priv.h | 14 ++++++++
>  5 files changed, 97 insertions(+), 20 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..426b25c 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>  #define EXEC_OBJECT_WRITE	(1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>  	__u64 flags;
>  
>  	__u64 rsvd1;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 14ea9f9..0856e60 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -293,6 +293,17 @@ drm_intel_bo_madvise(drm_intel_bo *bo, int madv)
>  }
>  
>  int
> +drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
> +{
> +	if (bo->bufmgr->bo_use_48b_address_range) {
> +		bo->bufmgr->bo_use_48b_address_range(bo, enable);
> +		return 0;
> +	}
> +
> +	return -ENODEV;
> +}
> +
> +int
>  drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
>  {
>  	return bo->bufmgr->bo_references(bo, target_bo);
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 95eecb8..a14c78f 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>  int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
>  int drm_intel_bo_busy(drm_intel_bo *bo);
>  int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
> +int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable);
>  
>  int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
>  int drm_intel_bo_is_reusable(drm_intel_bo *bo);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 2723e21..09d82d2 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -83,6 +83,22 @@
>  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
>  #define MAX2(A, B) ((A) > (B) ? (A) : (B))
>  
> +/**
> + * upper_32_bits - return bits 32-63 of a number
> + * @n: the number we're accessing
> + *
> + * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
> + * the "right shift count >= width of type" warning when that quantity is
> + * 32-bits.
> + */
> +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
> +
> +/**
> + * lower_32_bits - return bits 0-31 of a number
> + * @n: the number we're accessing
> + */
> +#define lower_32_bits(n) ((__u32)(n))
> +
>  typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
>  
>  struct drm_intel_gem_bo_bucket {
> @@ -237,6 +253,15 @@ struct _drm_intel_bo_gem {
>  	bool is_userptr;
>  
>  	/**
> +	 * Boolean of whether this buffer can be placed in the full 48-bit
> +	 * address range on gen8+.
> +	 *
> +	 * By default, buffers will be keep in a 32-bit range, unless this
> +	 * flag is explicitly set.
> +	 */
> +	bool use_48b_address_range;
> +
> +	/**
>  	 * Size in bytes of this buffer and its relocation descendents.
>  	 *
>  	 * Used to avoid costly tree walking in
> @@ -395,14 +420,16 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
>  			drm_intel_bo_gem *target_gem =
>  			    (drm_intel_bo_gem *) target_bo;
>  
> -			DBG("%2d: %d (%s)@0x%08llx -> "
> -			    "%d (%s)@0x%08lx + 0x%08x\n",
> +			DBG("%2d: %d (%s)@0x%08x %08x -> "
> +			    "%d (%s)@0x%08x %08x + 0x%08x\n",
>  			    i,
>  			    bo_gem->gem_handle, bo_gem->name,
> -			    (unsigned long long)bo_gem->relocs[j].offset,
> +			    upper_32_bits(bo_gem->relocs[j].offset),
> +			    lower_32_bits(bo_gem->relocs[j].offset),
>  			    target_gem->gem_handle,
>  			    target_gem->name,
> -			    target_bo->offset64,
> +			    upper_32_bits(target_bo->offset64),
> +			    lower_32_bits(target_bo->offset64),
>  			    bo_gem->relocs[j].delta);
>  		}
>  	}
> @@ -468,11 +495,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>  	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>  	int index;
> +	int flags = 0;
> +
> +	if (need_fence)
> +		flags |= EXEC_OBJECT_NEEDS_FENCE;
> +	if (bo_gem->use_48b_address_range)
> +		flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>  
>  	if (bo_gem->validate_index != -1) {
> -		if (need_fence)
> -			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
> -				EXEC_OBJECT_NEEDS_FENCE;
> +		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
>  		return;
>  	}
>  
> @@ -501,13 +532,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>  	bufmgr_gem->exec2_objects[index].alignment = bo->align;
>  	bufmgr_gem->exec2_objects[index].offset = 0;
>  	bufmgr_gem->exec_bos[index] = bo;
> -	bufmgr_gem->exec2_objects[index].flags = 0;
> +	bufmgr_gem->exec2_objects[index].flags = flags;
>  	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
>  	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
> -	if (need_fence) {
> -		bufmgr_gem->exec2_objects[index].flags |=
> -			EXEC_OBJECT_NEEDS_FENCE;
> -	}
>  	bufmgr_gem->exec_count++;
>  }
>  
> @@ -780,6 +807,7 @@ retry:
>  	bo_gem->used_as_reloc_target = false;
>  	bo_gem->has_error = false;
>  	bo_gem->reusable = true;
> +	bo_gem->use_48b_address_range = false;
>  
>  	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
>  
> @@ -926,6 +954,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>  	bo_gem->used_as_reloc_target = false;
>  	bo_gem->has_error = false;
>  	bo_gem->reusable = false;
> +	bo_gem->use_48b_address_range = false;
>  
>  	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
>  
> @@ -1081,6 +1110,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
>  	bo_gem->bo.handle = open_arg.handle;
>  	bo_gem->global_name = handle;
>  	bo_gem->reusable = false;
> +	bo_gem->use_48b_address_range = false;
>  
>  	memclear(get_tiling);
>  	get_tiling.handle = bo_gem->gem_handle;
> @@ -1930,6 +1960,13 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  	return 0;
>  }
>  
> +static void
> +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
> +{
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	bo_gem->use_48b_address_range = enable;
> +}
> +
>  static int
>  drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  			    drm_intel_bo *target_bo, uint32_t target_offset,
> @@ -2073,10 +2110,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
>  
>  		/* Update the buffer offset */
>  		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
> -			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
> -			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
> -			    (unsigned long long)bufmgr_gem->exec_objects[i].
> -			    offset);
> +			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
> +			    bo_gem->gem_handle, bo_gem->name,
> +			    upper_32_bits(bo->offset64),
> +			    lower_32_bits(bo->offset64),
> +			    upper_32_bits(bufmgr_gem->exec_objects[i].offset),
> +			    lower_32_bits(bufmgr_gem->exec_objects[i].offset));
>  			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
>  			bo->offset = bufmgr_gem->exec_objects[i].offset;
>  		}
> @@ -2094,9 +2133,12 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
>  
>  		/* Update the buffer offset */
>  		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
> -			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
> -			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
> -			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
> +			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
> +			    bo_gem->gem_handle, bo_gem->name,
> +			    upper_32_bits(bo->offset64),
> +			    lower_32_bits(bo->offset64),
> +			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
> +			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
>  			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
>  			bo->offset = bufmgr_gem->exec2_objects[i].offset;
>  		}
> @@ -2481,6 +2523,7 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s
>  	bo_gem->used_as_reloc_target = false;
>  	bo_gem->has_error = false;
>  	bo_gem->reusable = false;
> +	bo_gem->use_48b_address_range = false;
>  
>  	DRMINITLISTHEAD(&bo_gem->vma_list);
>  	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
> @@ -3278,6 +3321,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>  		}
>  	}
>  
> +	if (bufmgr_gem->gen >= 8) {
> +		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
> +		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> +		if (ret == 0 && *gp.value == 3)
> +			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
> +	}
> +
>  	/* Let's go with one relocation per every 2 dwords (but round down a bit
>  	 * since a power of two will mean an extra page allocation for the reloc
>  	 * buffer).
> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> index 59ebd18..5c17ffb 100644
> --- a/intel/intel_bufmgr_priv.h
> +++ b/intel/intel_bufmgr_priv.h
> @@ -152,6 +152,20 @@ struct _drm_intel_bufmgr {
>  	void (*destroy) (drm_intel_bufmgr *bufmgr);
>  
>  	/**
> +	 * Indicate if the buffer can be placed anywhere in the full ppgtt
> +	 * address range (2^48).
> +	 *
> +	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
> +	 * General State Heap (GSH) or Intructions State Heap (ISH) must
> +	 * be in a 32-bit range. 48-bit range will only be used when explicitly
> +	 * requested.
> +	 *
> +	 * \param bo Buffer to set the use_48b_address_range flag.
> +	 * \param enable The flag value.
> +	 */
> +	void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable);
> +
> +	/**
>  	 * Add relocation entry in reloc_buf, which will be updated with the
>  	 * target buffer's real offset on on command submission.
>  	 *
> -- 
> 2.5.0
>