[2/6] drm/i915: Align GGTT sizes to a fence tile row

Submitted by Chris Wilson on Jan. 9, 2017, 4:16 p.m.

Details

Message ID 20170109161613.11881-2-chris@chris-wilson.co.uk
State Accepted
Commit 5b30694b474d00f8588fa367f9562d8f2e4c7075
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson Jan. 9, 2017, 4:16 p.m.
Ensure the view occupies the full tile row so that reads/writes into the
VMA do not escape (via fenced detiling) into neighbouring objects - we
will pad the object with scratch pages to satisfy the fence. This
applies the lazy-tiling we employed on gen2/3 to gen4+.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h        |  5 +++--
 drivers/gpu/drm/i915/i915_gem.c        | 27 +++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_tiling.c | 18 +++++++++---------
 drivers/gpu/drm/i915/i915_vma.c        | 10 ++++++++--
 4 files changed, 39 insertions(+), 21 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 52d01be956cc..8185229f370f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3361,9 +3361,10 @@  int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size,
-			   int tiling_mode);
+			   int tiling_mode, unsigned int stride);
 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
-				int tiling_mode, bool fenced);
+				int tiling_mode, unsigned int stride,
+				bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df4acbefc4a1..d6fee91a5fa9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2021,21 +2021,29 @@  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
  * @dev_priv: i915 device
  * @size: object size
  * @tiling_mode: tiling mode
+ * @stride: tiling stride
  *
  * Return the required global GTT size for an object, taking into account
  * potential fence register mapping.
  */
 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
-			   u64 size, int tiling_mode)
+			   u64 size, int tiling_mode, unsigned int stride)
 {
 	u64 ggtt_size;
 
-	GEM_BUG_ON(size == 0);
+	GEM_BUG_ON(!size);
 
-	if (INTEL_GEN(dev_priv) >= 4 ||
-	    tiling_mode == I915_TILING_NONE)
+	if (tiling_mode == I915_TILING_NONE)
 		return size;
 
+	GEM_BUG_ON(!stride);
+
+	if (INTEL_GEN(dev_priv) >= 4) {
+		stride *= i915_gem_tile_height(tiling_mode);
+		GEM_BUG_ON(stride & 4095);
+		return roundup(size, stride);
+	}
+
 	/* Previous chips need a power-of-two fence region when tiling */
 	if (IS_GEN3(dev_priv))
 		ggtt_size = 1024*1024;
@@ -2053,15 +2061,17 @@  u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
  * @dev_priv: i915 device
  * @size: object size
  * @tiling_mode: tiling mode
+ * @stride: tiling stride
  * @fenced: is fenced alignment required or not
  *
  * Return the required global GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
-				int tiling_mode, bool fenced)
+				int tiling_mode, unsigned int stride,
+				bool fenced)
 {
-	GEM_BUG_ON(size == 0);
+	GEM_BUG_ON(!size);
 
 	/*
 	 * Minimum alignment is 4k (GTT page size), but might be greater
@@ -2076,7 +2086,7 @@  u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
 	 * Previous chips need to be aligned to the size of the smallest
 	 * fence register that can contain the object.
 	 */
-	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
+	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode, stride);
 }
 
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -3687,7 +3697,8 @@  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			u32 fence_size;
 
 			fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
-							    i915_gem_object_get_tiling(obj));
+							    i915_gem_object_get_tiling(obj),
+							    i915_gem_object_get_stride(obj));
 			/* If the required space is larger than the available
 			 * aperture, we will not able to find a slot for the
 			 * object and unbinding the object now will be in
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 62ad375de6ca..51b8d71876b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -117,7 +117,8 @@  i915_tiling_ok(struct drm_i915_private *dev_priv,
 	return true;
 }
 
-static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
+static bool i915_vma_fence_prepare(struct i915_vma *vma,
+				   int tiling_mode, unsigned int stride)
 {
 	struct drm_i915_private *dev_priv = vma->vm->i915;
 	u32 size;
@@ -133,7 +134,7 @@  static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
 			return false;
 	}
 
-	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
+	size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode, stride);
 	if (vma->node.size < size)
 		return false;
 
@@ -145,20 +146,17 @@  static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
 
 /* Make the current GTT allocation valid for the change in tiling. */
 static int
-i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
+i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
+			      int tiling_mode, unsigned int stride)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 	int ret;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
 
-	if (INTEL_GEN(dev_priv) >= 4)
-		return 0;
-
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (i915_vma_fence_prepare(vma, tiling_mode))
+		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
 			continue;
 
 		ret = i915_vma_unbind(vma);
@@ -255,7 +253,9 @@  i915_gem_set_tiling(struct drm_device *dev, void *data,
 		 * whilst executing a fenced command for an untiled object.
 		 */
 
-		err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
+		err = i915_gem_object_fence_prepare(obj,
+						    args->tiling_mode,
+						    args->stride);
 		if (!err) {
 			struct i915_vma *vma;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 58f2483362ad..734f77b7697f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -284,11 +284,14 @@  void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 
 	fence_size = i915_gem_get_ggtt_size(dev_priv,
 					    vma->size,
-					    i915_gem_object_get_tiling(obj));
+					    i915_gem_object_get_tiling(obj),
+					    i915_gem_object_get_stride(obj));
 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
 						      vma->size,
 						      i915_gem_object_get_tiling(obj),
+						      i915_gem_object_get_stride(obj),
 						      true);
+	GEM_BUG_ON(!is_power_of_2(fence_alignment));
 
 	fenceable = (vma->node.size == fence_size &&
 		     (vma->node.start & (fence_alignment - 1)) == 0);
@@ -370,12 +373,15 @@  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	size = max(size, vma->size);
 	if (flags & PIN_MAPPABLE)
 		size = i915_gem_get_ggtt_size(dev_priv, size,
-					      i915_gem_object_get_tiling(obj));
+					      i915_gem_object_get_tiling(obj),
+					      i915_gem_object_get_stride(obj));
 
 	alignment = max(max(alignment, vma->display_alignment),
 			i915_gem_get_ggtt_alignment(dev_priv, size,
 						    i915_gem_object_get_tiling(obj),
+						    i915_gem_object_get_stride(obj),
 						    flags & PIN_MAPPABLE));
+	GEM_BUG_ON(!is_power_of_2(alignment));
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;