[v4,17/18] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset

Submitted by Michel Thierry on July 7, 2015, 3:15 p.m.

Details

Message ID 1436282103-5854-18-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry July 7, 2015, 3:15 p.m.
There are some allocations that must be only referenced by 32-bit
offsets. To limit the chances of having the first 4GB already full,
objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
DRM_MM_CREATE_TOP flags

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Intructions State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State
Offset are limited to 32-bits.

Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
they can be allocated above the 32-bit address range. To limit the
chances of having the first 4GB already full, objects will use
DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.

v2: Changed flag logic from neeeds_32b, to supports_48b.
v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
to use last PIN_ defined instead of hard-coded value; use correct limit
check in eb_vma_misplaced. (Chris)
v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 ++
 drivers/gpu/drm/i915/i915_gem.c            | 14 ++++++++++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++
 include/uapi/drm/i915_drm.h                |  3 ++-
 4 files changed, 29 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a30a73..fc88e58 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2772,6 +2772,8 @@  void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_OFFSET_BIAS	(1<<3)
 #define PIN_USER	(1<<4)
 #define PIN_UPDATE	(1<<5)
+#define PIN_ZONE_4G	(1<<6)
+#define PIN_HIGH	(1<<7)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ebfb789..b13900d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3720,6 +3720,8 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 fence_alignment, unfenced_alignment;
 	u64 size, fence_size;
+	u32 search_flag = DRM_MM_SEARCH_DEFAULT;
+	u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
 	u64 start =
 		flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	u64 end =
@@ -3761,6 +3763,14 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 						   obj->tiling_mode,
 						   false);
 		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+
+		if (flags & PIN_HIGH) {
+			search_flag = DRM_MM_SEARCH_BELOW;
+			alloc_flag = DRM_MM_CREATE_TOP;
+		}
+
+		if (flags & PIN_ZONE_4G)
+			end = (1ULL << 32);
 	}
 
 	if (alignment == 0)
@@ -3803,8 +3813,8 @@  search_free:
 						  size, alignment,
 						  obj->cache_level,
 						  start, end,
-						  DRM_MM_SEARCH_DEFAULT,
-						  DRM_MM_CREATE_DEFAULT);
+						  search_flag,
+						  alloc_flag);
 	if (ret) {
 		ret = i915_gem_evict_something(dev, vm, size, alignment,
 					       obj->cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 83577c6..f2b43a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -588,11 +588,20 @@  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 
+	/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+	 * limit address to the first 4GBs for unflagged objects.
+	 */
+	flags |= PIN_ZONE_4G;
+	if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)
+		flags &= ~PIN_ZONE_4G;
+
 	if (!drm_mm_node_allocated(&vma->node)) {
 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if ((flags & PIN_MAPPABLE) == 0)
+			flags |= PIN_HIGH;
 	}
 
 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
@@ -670,6 +679,10 @@  eb_vma_misplaced(struct i915_vma *vma)
 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return !only_mappable_for_reloc(entry->flags);
 
+	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+	    (vma->node.start + vma->node.size) >= (1ULL << 32))
+		return true;
+
 	return false;
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e7c29f1..e4471e8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -686,7 +686,8 @@  struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;

Comments

On 7/7/2015 4:15 PM, Michel Thierry wrote:
> There are some allocations that must be only referenced by 32-bit
> offsets. To limit the chances of having the first 4GB already full,
> objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
> DRM_MM_CREATE_TOP flags
>
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Intructions State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State
> Offset are limited to 32-bits.
>
> Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
> they can be allocated above the 32-bit address range. To limit the
> chances of having the first 4GB already full, objects will use
> DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.
>
> v2: Changed flag logic from neeeds_32b, to supports_48b.
> v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
> v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
> to use last PIN_ defined instead of hard-coded value; use correct limit
> check in eb_vma_misplaced. (Chris)
> v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |  2 ++
>   drivers/gpu/drm/i915/i915_gem.c            | 14 ++++++++++++--
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++
>   include/uapi/drm/i915_drm.h                |  3 ++-
>   4 files changed, 29 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4a30a73..fc88e58 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2772,6 +2772,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>   #define PIN_OFFSET_BIAS        (1<<3)
>   #define PIN_USER       (1<<4)
>   #define PIN_UPDATE     (1<<5)
> +#define PIN_ZONE_4G    (1<<6)
> +#define PIN_HIGH       (1<<7)
>   #define PIN_OFFSET_MASK (~4095)
>   int __must_check
>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ebfb789..b13900d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3720,6 +3720,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>          struct drm_i915_private *dev_priv = dev->dev_private;
>          u32 fence_alignment, unfenced_alignment;
>          u64 size, fence_size;
> +       u32 search_flag = DRM_MM_SEARCH_DEFAULT;
> +       u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
>          u64 start =
>                  flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
>          u64 end =
> @@ -3761,6 +3763,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>                                                     obj->tiling_mode,
>                                                     false);
>                  size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +
> +               if (flags & PIN_HIGH) {
> +                       search_flag = DRM_MM_SEARCH_BELOW;
> +                       alloc_flag = DRM_MM_CREATE_TOP;
> +               }
> +
> +               if (flags & PIN_ZONE_4G)
> +                       end = (1ULL << 32);
Hi Chris,
second thoughts on this... would PIN_HIGH & PIN_ZONE_4G be a problem if 
someone mixes a 64-bit kernel with 32-bit userland?
Maybe it's safer to set end = (1ULL << 32) - PAGE_SIZE.

>          }
>
>          if (alignment == 0)
On Thu, Jul 09, 2015 at 05:19:27PM +0100, Michel Thierry wrote:
> On 7/7/2015 4:15 PM, Michel Thierry wrote:
> >There are some allocations that must be only referenced by 32-bit
> >offsets. To limit the chances of having the first 4GB already full,
> >objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
> >DRM_MM_CREATE_TOP flags
> >
> >In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> >General State Heap (GSH) or Intructions State Heap (ISH) must be in a
> >32-bit range, because the General State Offset and Instruction State
> >Offset are limited to 32-bits.
> >
> >Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
> >they can be allocated above the 32-bit address range. To limit the
> >chances of having the first 4GB already full, objects will use
> >DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.
> >
> >v2: Changed flag logic from neeeds_32b, to supports_48b.
> >v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
> >v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
> >to use last PIN_ defined instead of hard-coded value; use correct limit
> >check in eb_vma_misplaced. (Chris)
> >v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)
> >
> >Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
> >Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h            |  2 ++
> >  drivers/gpu/drm/i915/i915_gem.c            | 14 ++++++++++++--
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++
> >  include/uapi/drm/i915_drm.h                |  3 ++-
> >  4 files changed, 29 insertions(+), 3 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 4a30a73..fc88e58 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2772,6 +2772,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
> >  #define PIN_OFFSET_BIAS        (1<<3)
> >  #define PIN_USER       (1<<4)
> >  #define PIN_UPDATE     (1<<5)
> >+#define PIN_ZONE_4G    (1<<6)
> >+#define PIN_HIGH       (1<<7)
> >  #define PIN_OFFSET_MASK (~4095)
> >  int __must_check
> >  i915_gem_object_pin(struct drm_i915_gem_object *obj,
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index ebfb789..b13900d 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -3720,6 +3720,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> >         struct drm_i915_private *dev_priv = dev->dev_private;
> >         u32 fence_alignment, unfenced_alignment;
> >         u64 size, fence_size;
> >+       u32 search_flag = DRM_MM_SEARCH_DEFAULT;
> >+       u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
> >         u64 start =
> >                 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> >         u64 end =
> >@@ -3761,6 +3763,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> >                                                    obj->tiling_mode,
> >                                                    false);
> >                 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> >+
> >+               if (flags & PIN_HIGH) {
> >+                       search_flag = DRM_MM_SEARCH_BELOW;
> >+                       alloc_flag = DRM_MM_CREATE_TOP;
> >+               }
> >+
> >+               if (flags & PIN_ZONE_4G)
> >+                       end = (1ULL << 32);
> Hi Chris,
> second thoughts on this... would PIN_HIGH & PIN_ZONE_4G be a problem
> if someone mixes a 64-bit kernel with 32-bit userland?
> Maybe it's safer to set end = (1ULL << 32) - PAGE_SIZE.

No, the uapi is and always has been u64. To be paranoid, you would have
to presume anything above INT_MAX is going to be trouble.
-Chris
On Tue, Jul 07, 2015 at 04:15:02PM +0100, Michel Thierry wrote:
> @@ -3761,6 +3763,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  						   obj->tiling_mode,
>  						   false);
>  		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +
> +		if (flags & PIN_HIGH) {
> +			search_flag = DRM_MM_SEARCH_BELOW;
> +			alloc_flag = DRM_MM_CREATE_TOP;
> +		}
> +
> +		if (flags & PIN_ZONE_4G)
> +			end = (1ULL << 32);
>  	}

This should be applied to both paths, as the PIN_HIGH is applicable to
the ggtt (keeping bits and bobs out of mappable is the plan). And with
PIN_ZONE_4G there is also no need to arbitrary impose restrictions on
applicablity (though we may never have a larger GGTT than 4G!).
-Chris