[libdrm,v3,1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag

Submitted by Michel Thierry on Aug. 7, 2015, 9:45 a.m.

Details

Message ID 1438940723-23084-2-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry Aug. 7, 2015, 9:45 a.m.
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.

Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
set automatically, while calls to drm_intel_bo_emit_reloc will clear it.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
    internally in emit_reloc functions (Ben)
    s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.

References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 include/drm/i915_drm.h    |  3 ++-
 intel/intel_bufmgr.c      | 16 ++++++++++++++
 intel/intel_bufmgr.h      |  6 +++++-
 intel/intel_bufmgr_gem.c  | 54 +++++++++++++++++++++++++++++++++++++++++++----
 intel/intel_bufmgr_priv.h | 11 ++++++++++
 5 files changed, 84 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@  struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..0bd5191 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -202,6 +202,22 @@  drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			drm_intel_bo *target_bo, uint32_t target_offset,
 			uint32_t read_domains, uint32_t write_domain)
 {
+	if (bo->bufmgr->bo_clear_supports_48b_address)
+		bo->bufmgr->bo_clear_supports_48b_address(target_bo);
+
+	return bo->bufmgr->bo_emit_reloc(bo, offset,
+					 target_bo, target_offset,
+					 read_domains, write_domain);
+}
+
+int
+drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+			drm_intel_bo *target_bo, uint32_t target_offset,
+			uint32_t read_domains, uint32_t write_domain)
+{
+	if (bo->bufmgr->bo_set_supports_48b_address)
+		bo->bufmgr->bo_set_supports_48b_address(target_bo);
+
 	return bo->bufmgr->bo_emit_reloc(bo, offset,
 					 target_bo, target_offset,
 					 read_domains, write_domain);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..8f91ffe 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -87,7 +87,8 @@  struct _drm_intel_bo {
 	/**
 	 * Last seen card virtual address (offset from the beginning of the
 	 * aperture) for the object.  This should be used to fill relocation
-	 * entries when calling drm_intel_bo_emit_reloc()
+	 * entries when calling drm_intel_bo_emit_reloc() or
+	 * drm_intel_bo_emit_reloc_48bit()
 	 */
 	uint64_t offset64;
 };
@@ -147,6 +148,9 @@  int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
 int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
 			    uint32_t read_domains, uint32_t write_domain);
+int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+				  drm_intel_bo *target_bo, uint32_t target_offset,
+				  uint32_t read_domains, uint32_t write_domain);
 int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
 				  drm_intel_bo *target_bo,
 				  uint32_t target_offset,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 41de396..713ed4e 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -140,6 +140,7 @@  typedef struct _drm_intel_bufmgr_gem {
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
+#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
 
 typedef struct _drm_intel_reloc_target_info {
 	drm_intel_bo *bo;
@@ -237,6 +238,14 @@  struct _drm_intel_bo_gem {
 	bool is_userptr;
 
 	/**
+	 * Boolean of whether this buffer can be in the whole 48-bit address.
+	 *
+	 * By default, buffers will be keep in a 32-bit range, unless this
+	 * flag is explicitly set.
+	 */
+	bool supports_48b_address;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -463,13 +472,18 @@  drm_intel_add_validate_buffer(drm_intel_bo *bo)
 }
 
 static void
-drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
+			       int supports_48b_address)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 	int index;
 
 	if (bo_gem->validate_index != -1) {
+		if (supports_48b_address) {
+			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
+				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		}
 		if (need_fence)
 			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
 				EXEC_OBJECT_NEEDS_FENCE;
@@ -508,6 +522,10 @@  drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 		bufmgr_gem->exec2_objects[index].flags |=
 			EXEC_OBJECT_NEEDS_FENCE;
 	}
+	if (supports_48b_address) {
+		bufmgr_gem->exec2_objects[index].flags |=
+			EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+	}
 	bufmgr_gem->exec_count++;
 }
 
@@ -1925,11 +1943,33 @@  do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 	else
 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
 
+	if (target_bo_gem->supports_48b_address)
+		bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
+			DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
+
 	bo_gem->reloc_count++;
 
 	return 0;
 }
 
+static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (bufmgr_gem->gen >= 8)
+		bo_gem->supports_48b_address = 1;
+}
+
+static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (bufmgr_gem->gen >= 8)
+		bo_gem->supports_48b_address = 0;
+}
+
 static int
 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2043,7 +2083,7 @@  drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 
 	for (i = 0; i < bo_gem->reloc_count; i++) {
 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
-		int need_fence;
+		int need_fence, supports_48b_addr;
 
 		if (target_bo == bo)
 			continue;
@@ -2056,8 +2096,12 @@  drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 		need_fence = (bo_gem->reloc_target_info[i].flags &
 			      DRM_INTEL_RELOC_FENCE);
 
+		supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
+				    DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
+
 		/* Add the target to the validate list */
-		drm_intel_add_validate_buffer2(target_bo, need_fence);
+		drm_intel_add_validate_buffer2(target_bo, need_fence,
+					       supports_48b_addr);
 	}
 }
 
@@ -2217,7 +2261,7 @@  do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
 	/* Add the batch buffer to the validation list.  There are no relocations
 	 * pointing to it.
 	 */
-	drm_intel_add_validate_buffer2(bo, 0);
+	drm_intel_add_validate_buffer2(bo, 0, 0);
 
 	memclear(execbuf);
 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
@@ -3299,6 +3343,8 @@  drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
 	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
 	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
+	bufmgr_gem->bufmgr.bo_set_supports_48b_address = drm_intel_gem_bo_set_supports_48b_address;
+	bufmgr_gem->bufmgr.bo_clear_supports_48b_address = drm_intel_gem_bo_clear_supports_48b_address;
 	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
 	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
 	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 59ebd18..774fa02 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -152,6 +152,17 @@  struct _drm_intel_bufmgr {
 	void (*destroy) (drm_intel_bufmgr *bufmgr);
 
 	/**
+	 * Set/Clear 48-bit address support flag in a given bo.
+	 *
+	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
+	 * General State Heap (GSH) or Intructions State Heap (ISH) must
+	 * be in a 32-bit range. 48-bit range will only be used when explicitly
+	 * requested.
+	 */
+	void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
+	void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
+
+	/**
 	 * Add relocation entry in reloc_buf, which will be updated with the
 	 * target buffer's real offset on on command submission.
 	 *

Comments

On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> The i915 driver has been modified to provide a flag to set when the 4GB
> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
> 48-bit range will only be used when explicitly requested.
> 
> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
> 
> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>     internally in emit_reloc functions (Ben)
>     s/48BADDRESS/48B_ADDRESS/ (Dave)
> v3: Keep set/clear functions internal, no-one needs to use them directly.
> 
> References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
> Cc: Ben Widawsky <ben@bwidawsk.net>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  include/drm/i915_drm.h    |  3 ++-
>  intel/intel_bufmgr.c      | 16 ++++++++++++++
>  intel/intel_bufmgr.h      |  6 +++++-
>  intel/intel_bufmgr_gem.c  | 54 +++++++++++++++++++++++++++++++++++++++++++----
>  intel/intel_bufmgr_priv.h | 11 ++++++++++
>  5 files changed, 84 insertions(+), 6 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..426b25c 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>  #define EXEC_OBJECT_WRITE	(1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>  	__u64 flags;
>  
>  	__u64 rsvd1;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 14ea9f9..0bd5191 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  			drm_intel_bo *target_bo, uint32_t target_offset,
>  			uint32_t read_domains, uint32_t write_domain)
>  {
> +	if (bo->bufmgr->bo_clear_supports_48b_address)
> +		bo->bufmgr->bo_clear_supports_48b_address(target_bo);

This is always true - you assign func to this func pointer unconditionally.
Also - why not return some meaningful value if the user does not have
enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
now. Check for gen >= 8 seems rather inadequate, and even then you're not
returning anything useful to the caller.

> +
> +	return bo->bufmgr->bo_emit_reloc(bo, offset,
> +					 target_bo, target_offset,
> +					 read_domains, write_domain);
> +}
> +

Using emit_reloc to set a BO flag seems confusing and can be error prone,
emit_reloc can be called many times and caller needs to be careful and call the
right function for each reloc.

> +int
> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +			drm_intel_bo *target_bo, uint32_t target_offset,
> +			uint32_t read_domains, uint32_t write_domain)
> +{
> +	if (bo->bufmgr->bo_set_supports_48b_address)
> +		bo->bufmgr->bo_set_supports_48b_address(target_bo);

Same situation as with clear_supports_48b_address.

> +
>  	return bo->bufmgr->bo_emit_reloc(bo, offset,
>  					 target_bo, target_offset,
>  					 read_domains, write_domain);
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 285919e..8f91ffe 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>  	/**
>  	 * Last seen card virtual address (offset from the beginning of the
>  	 * aperture) for the object.  This should be used to fill relocation
> -	 * entries when calling drm_intel_bo_emit_reloc()
> +	 * entries when calling drm_intel_bo_emit_reloc() or
> +	 * drm_intel_bo_emit_reloc_48bit()
>  	 */
>  	uint64_t offset64;
>  };
> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
>  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  			    drm_intel_bo *target_bo, uint32_t target_offset,
>  			    uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +				  drm_intel_bo *target_bo, uint32_t target_offset,
> +				  uint32_t read_domains, uint32_t write_domain);
>  int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
>  				  drm_intel_bo *target_bo,
>  				  uint32_t target_offset,
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 41de396..713ed4e 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem {
>  } drm_intel_bufmgr_gem;
>  
>  #define DRM_INTEL_RELOC_FENCE (1<<0)
> +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)

Why are you duplicating information about support for 48B in both emit reloc
function argument and struct bo_gem?
  
>  typedef struct _drm_intel_reloc_target_info {
>  	drm_intel_bo *bo;
> @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem {
>  	bool is_userptr;
>  
>  	/**
> +	 * Boolean of whether this buffer can be in the whole 48-bit address.
> +	 *
> +	 * By default, buffers will be keep in a 32-bit range, unless this
> +	 * flag is explicitly set.
> +	 */
> +	bool supports_48b_address;
> +
> +	/**
>  	 * Size in bytes of this buffer and its relocation descendents.
>  	 *
>  	 * Used to avoid costly tree walking in
> @@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
>  }
>  
>  static void
> -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
> +			       int supports_48b_address)
>  {
>  	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>  	int index;
>  
>  	if (bo_gem->validate_index != -1) {
> +		if (supports_48b_address) {
> +			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
> +				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +		}
>  		if (need_fence)
>  			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
>  				EXEC_OBJECT_NEEDS_FENCE;
> @@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>  		bufmgr_gem->exec2_objects[index].flags |=
>  			EXEC_OBJECT_NEEDS_FENCE;
>  	}
> +	if (supports_48b_address) {
> +		bufmgr_gem->exec2_objects[index].flags |=
> +			EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +	}
>  	bufmgr_gem->exec_count++;
>  }
>  
> @@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  	else
>  		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
>  
> +	if (target_bo_gem->supports_48b_address)
> +		bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
> +			DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
> +
>  	bo_gem->reloc_count++;
>  
>  	return 0;
>  }
>  
> +static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +	if (bufmgr_gem->gen >= 8)
> +		bo_gem->supports_48b_address = 1;
> +}
> +
> +static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +	if (bufmgr_gem->gen >= 8)
> +		bo_gem->supports_48b_address = 0;
> +}
> +
>  static int
>  drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>  			    drm_intel_bo *target_bo, uint32_t target_offset,
> @@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>  
>  	for (i = 0; i < bo_gem->reloc_count; i++) {
>  		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
> -		int need_fence;
> +		int need_fence, supports_48b_addr;
>  
>  		if (target_bo == bo)
>  			continue;
> @@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>  		need_fence = (bo_gem->reloc_target_info[i].flags &
>  			      DRM_INTEL_RELOC_FENCE);
>  
> +		supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
> +				    DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
> +
>  		/* Add the target to the validate list */
> -		drm_intel_add_validate_buffer2(target_bo, need_fence);
> +		drm_intel_add_validate_buffer2(target_bo, need_fence,
> +					       supports_48b_addr);
>  	}
>  }
>  
> @@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
>  	/* Add the batch buffer to the validation list.  There are no relocations
>  	 * pointing to it.
>  	 */
> -	drm_intel_add_validate_buffer2(bo, 0);
> +	drm_intel_add_validate_buffer2(bo, 0, 0);
>  
>  	memclear(execbuf);
>  	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
> @@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>  	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
>  	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
>  	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
> +	bufmgr_gem->bufmgr.bo_set_supports_48b_address = drm_intel_gem_bo_set_supports_48b_address;
> +	bufmgr_gem->bufmgr.bo_clear_supports_48b_address = drm_intel_gem_bo_clear_supports_48b_address;
>  	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
>  	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
>  	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> index 59ebd18..774fa02 100644
> --- a/intel/intel_bufmgr_priv.h
> +++ b/intel/intel_bufmgr_priv.h
> @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
>  	void (*destroy) (drm_intel_bufmgr *bufmgr);
>  
>  	/**
> +	 * Set/Clear 48-bit address support flag in a given bo.
> +	 *
> +	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
> +	 * General State Heap (GSH) or Intructions State Heap (ISH) must
> +	 * be in a 32-bit range. 48-bit range will only be used when explicitly
> +	 * requested.
> +	 */
> +	void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
> +	void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
> +
> +	/**
>  	 * Add relocation entry in reloc_buf, which will be updated with the
>  	 * target buffer's real offset on on command submission.
>  	 *
> -- 
> 2.5.0

You also haven't updated the dump_validation_list with proper format specifier
(addresses are truncated otherwise).

I really don't like hiding set_supports_48b_address/clear_supports_48b_address
in emit reloc - making this explicit would be a better approach.
Of course this is just my opinion, but let me post a bit different version of
this API - just as an RFC so you can see how it would look with explicit set on
bo and without adding another emit_reloc variant.
On 8/7/2015 11:56 AM, Michał Winiarski wrote:
> On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
>> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
>> 32-bit range, because the General State Offset and Instruction State Offset
>> are limited to 32-bits.
>>
>> The i915 driver has been modified to provide a flag to set when the 4GB
>> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
>> 48-bit range will only be used when explicitly requested.
>>
>> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
>> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
>>
>> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>>      internally in emit_reloc functions (Ben)
>>      s/48BADDRESS/48B_ADDRESS/ (Dave)
>> v3: Keep set/clear functions internal, no-one needs to use them directly.
>>
>> References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
>> Cc: Ben Widawsky <ben@bwidawsk.net>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   include/drm/i915_drm.h    |  3 ++-
>>   intel/intel_bufmgr.c      | 16 ++++++++++++++
>>   intel/intel_bufmgr.h      |  6 +++++-
>>   intel/intel_bufmgr_gem.c  | 54 +++++++++++++++++++++++++++++++++++++++++++----
>>   intel/intel_bufmgr_priv.h | 11 ++++++++++
>>   5 files changed, 84 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index ded43b1..426b25c 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>>   #define EXEC_OBJECT_WRITE	(1<<2)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
>> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>   	__u64 flags;
>>
>>   	__u64 rsvd1;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 14ea9f9..0bd5191 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   			drm_intel_bo *target_bo, uint32_t target_offset,
>>   			uint32_t read_domains, uint32_t write_domain)
>>   {
>> +	if (bo->bufmgr->bo_clear_supports_48b_address)
>> +		bo->bufmgr->bo_clear_supports_48b_address(target_bo);
>
> This is always true - you assign func to this func pointer unconditionally.
> Also - why not return some meaningful value if the user does not have
> enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
> now. Check for gen >= 8 seems rather inadequate, and even then you're not
> returning anything useful to the caller.
>
>> +
>> +	return bo->bufmgr->bo_emit_reloc(bo, offset,
>> +					 target_bo, target_offset,
>> +					 read_domains, write_domain);
>> +}
>> +
>
> Using emit_reloc to set a BO flag seems confusing and can be error prone,
> emit_reloc can be called many times and caller needs to be careful and call the
> right function for each reloc.
>
>> +int
>> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +			drm_intel_bo *target_bo, uint32_t target_offset,
>> +			uint32_t read_domains, uint32_t write_domain)
>> +{
>> +	if (bo->bufmgr->bo_set_supports_48b_address)
>> +		bo->bufmgr->bo_set_supports_48b_address(target_bo);
>
> Same situation as with clear_supports_48b_address.
>
>> +
>>   	return bo->bufmgr->bo_emit_reloc(bo, offset,
>>   					 target_bo, target_offset,
>>   					 read_domains, write_domain);
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 285919e..8f91ffe 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>>   	/**
>>   	 * Last seen card virtual address (offset from the beginning of the
>>   	 * aperture) for the object.  This should be used to fill relocation
>> -	 * entries when calling drm_intel_bo_emit_reloc()
>> +	 * entries when calling drm_intel_bo_emit_reloc() or
>> +	 * drm_intel_bo_emit_reloc_48bit()
>>   	 */
>>   	uint64_t offset64;
>>   };
>> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
>>   int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   			    drm_intel_bo *target_bo, uint32_t target_offset,
>>   			    uint32_t read_domains, uint32_t write_domain);
>> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +				  drm_intel_bo *target_bo, uint32_t target_offset,
>> +				  uint32_t read_domains, uint32_t write_domain);
>>   int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
>>   				  drm_intel_bo *target_bo,
>>   				  uint32_t target_offset,
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 41de396..713ed4e 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem {
>>   } drm_intel_bufmgr_gem;
>>
>>   #define DRM_INTEL_RELOC_FENCE (1<<0)
>> +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
>
> Why are you duplicating information about support for 48B in both emit reloc
> function argument and struct bo_gem?
>
>>   typedef struct _drm_intel_reloc_target_info {
>>   	drm_intel_bo *bo;
>> @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem {
>>   	bool is_userptr;
>>
>>   	/**
>> +	 * Boolean of whether this buffer can be in the whole 48-bit address.
>> +	 *
>> +	 * By default, buffers will be keep in a 32-bit range, unless this
>> +	 * flag is explicitly set.
>> +	 */
>> +	bool supports_48b_address;
>> +
>> +	/**
>>   	 * Size in bytes of this buffer and its relocation descendents.
>>   	 *
>>   	 * Used to avoid costly tree walking in
>> @@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
>>   }
>>
>>   static void
>> -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
>> +			       int supports_48b_address)
>>   {
>>   	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>>   	int index;
>>
>>   	if (bo_gem->validate_index != -1) {
>> +		if (supports_48b_address) {
>> +			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
>> +				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		}
>>   		if (need_fence)
>>   			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
>>   				EXEC_OBJECT_NEEDS_FENCE;
>> @@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>>   		bufmgr_gem->exec2_objects[index].flags |=
>>   			EXEC_OBJECT_NEEDS_FENCE;
>>   	}
>> +	if (supports_48b_address) {
>> +		bufmgr_gem->exec2_objects[index].flags |=
>> +			EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +	}
>>   	bufmgr_gem->exec_count++;
>>   }
>>
>> @@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   	else
>>   		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
>>
>> +	if (target_bo_gem->supports_48b_address)
>> +		bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
>> +			DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
>> +
>>   	bo_gem->reloc_count++;
>>
>>   	return 0;
>>   }
>>
>> +static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +	if (bufmgr_gem->gen >= 8)
>> +		bo_gem->supports_48b_address = 1;
>> +}
>> +
>> +static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +	if (bufmgr_gem->gen >= 8)
>> +		bo_gem->supports_48b_address = 0;
>> +}
>> +
>>   static int
>>   drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>   			    drm_intel_bo *target_bo, uint32_t target_offset,
>> @@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>>
>>   	for (i = 0; i < bo_gem->reloc_count; i++) {
>>   		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
>> -		int need_fence;
>> +		int need_fence, supports_48b_addr;
>>
>>   		if (target_bo == bo)
>>   			continue;
>> @@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>>   		need_fence = (bo_gem->reloc_target_info[i].flags &
>>   			      DRM_INTEL_RELOC_FENCE);
>>
>> +		supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
>> +				    DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
>> +
>>   		/* Add the target to the validate list */
>> -		drm_intel_add_validate_buffer2(target_bo, need_fence);
>> +		drm_intel_add_validate_buffer2(target_bo, need_fence,
>> +					       supports_48b_addr);
>>   	}
>>   }
>>
>> @@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
>>   	/* Add the batch buffer to the validation list.  There are no relocations
>>   	 * pointing to it.
>>   	 */
>> -	drm_intel_add_validate_buffer2(bo, 0);
>> +	drm_intel_add_validate_buffer2(bo, 0, 0);
>>
>>   	memclear(execbuf);
>>   	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
>> @@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>>   	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
>>   	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
>>   	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
>> +	bufmgr_gem->bufmgr.bo_set_supports_48b_address = drm_intel_gem_bo_set_supports_48b_address;
>> +	bufmgr_gem->bufmgr.bo_clear_supports_48b_address = drm_intel_gem_bo_clear_supports_48b_address;
>>   	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
>>   	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
>>   	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 59ebd18..774fa02 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
>>   	void (*destroy) (drm_intel_bufmgr *bufmgr);
>>
>>   	/**
>> +	 * Set/Clear 48-bit address support flag in a given bo.
>> +	 *
>> +	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
>> +	 * General State Heap (GSH) or Intructions State Heap (ISH) must
>> +	 * be in a 32-bit range. 48-bit range will only be used when explicitly
>> +	 * requested.
>> +	 */
>> +	void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
>> +	void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
>> +
>> +	/**
>>   	 * Add relocation entry in reloc_buf, which will be updated with the
>>   	 * target buffer's real offset on on command submission.
>>   	 *
>> --
>> 2.5.0
>
> You also haven't updated the dump_validation_list with proper format specifier
> (addresses are truncated otherwise).
>
> I really don't like hiding set_supports_48b_address/clear_supports_48b_address
> in emit reloc - making this explicit would be a better approach.
> Of course this is just my opinion, but let me post a bit different version of
> this API - just as an RFC so you can see how it would look with explicit set on
> bo and without adding another emit_reloc variant.
>

Hi Michał,

Ben suggested having the set/clear in emit reloc as this is the only 
place mesa cares about these wa.

But I see your point, this will be used not only by mesa, so we should 
have something that is good for all the other projects.

-Michel
On Fri, 2015-08-07 at 12:36 +0100, Michel Thierry wrote:
> Hi Michał,

> 

> Ben suggested having the set/clear in emit reloc as this is the only 

> place mesa cares about these wa.

> 

> But I see your point, this will be used not only by mesa, so we 

> should 

> have something that is good for all the other projects.

> 

> -Michel


If there are no comments from anyone else then I'm fine with public API
from last version.
But comments about the error handling and implementation details could
still be addressed.

-Michał