[11/17] drm/i915: Fix up the vma aliasing ppgtt binding

Submitted by Daniel Vetter on April 14, 2015, 3:35 p.m.

Details

Message ID 1429025727-15380-12-git-send-email-daniel.vetter@ffwll.ch
State New
Headers show

Not browsing as part of any series.

Commit Message

Daniel Vetter April 14, 2015, 3:35 p.m.
Currently we have the problem that the decision whether ptes need to
be (re)written is splattered all over the codebase. Move all that into
i915_vma_bind. This needs a few changes:
- Just reuse the PIN_* flags for i915_vma_bind and do the conversion
  to vma->bound in there to avoid duplicating the conversion code all
  over.
- We need to make binding for EXECBUF (i.e. pick aliasing ppgtt if
  around) explicit, add PIN_EXECBUF for that.
- Two callers want to update ptes, give them a PIN_UPDATE for that.

Of course we still want to avoid double-binding, but that should be
taken care of:
- A ppgtt vma will only ever see PIN_EXECBUF, so no issue with
  double-binding.
- A ggtt vma with aliasing ppgtt needs both types of binding, and we
  track that properly now.
- A ggtt vma without aliasing ppgtt could be bound twice. In the
  lower-level ->bind_vma functions hence unconditionally set
  GLOBAL_BIND when writing the ggtt ptes.

There's still a bit room for cleanup, but that's for follow-up
patches.

v2: Fixup fumbles.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 11 +++--
 drivers/gpu/drm/i915/i915_gem.c            | 11 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  7 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 65 ++++++++++++------------------
 4 files changed, 40 insertions(+), 54 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 47be4a57e6a9..eaeae041fed9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2640,10 +2640,13 @@  void i915_init_vm(struct drm_i915_private *dev_priv,
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
-#define PIN_MAPPABLE 0x1
-#define PIN_NONBLOCK 0x2
-#define PIN_GLOBAL 0x4
-#define PIN_OFFSET_BIAS 0x8
+/* Flags used by pin/bind&friends. */
+#define PIN_MAPPABLE	(1<<0)
+#define PIN_NONBLOCK	(1<<1)
+#define PIN_GLOBAL	(1<<2)
+#define PIN_OFFSET_BIAS	(1<<3)
+#define PIN_EXECBUF	(1<<4)
+#define PIN_UPDATE	(1<<5)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 10e873c8957f..047629b08697 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3557,8 +3557,7 @@  search_free:
 		goto err_remove_node;
 
 	trace_i915_vma_bind(vma, flags);
-	ret = i915_vma_bind(vma, obj->cache_level,
-			    flags & PIN_GLOBAL ? GLOBAL_BIND : 0);
+	ret = i915_vma_bind(vma, obj->cache_level, flags);
 	if (ret)
 		goto err_finish_gtt;
 
@@ -3784,7 +3783,7 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		list_for_each_entry(vma, &obj->vma_list, vma_link)
 			if (drm_mm_node_allocated(&vma->node)) {
 				ret = i915_vma_bind(vma, cache_level,
-						    vma->bound & GLOBAL_BIND);
+						    PIN_UPDATE);
 				if (ret)
 					return ret;
 			}
@@ -4187,10 +4186,8 @@  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 						 flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
-	}
-
-	if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) {
-		ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
+	} else {
+		ret = i915_vma_bind(vma, obj->cache_level, flags);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0b06c6de27de..f005f3151147 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -400,10 +400,9 @@  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	 * pipe_control writes because the gpu doesn't properly redirect them
 	 * through the ppgtt for non_secure batchbuffers. */
 	if (unlikely(IS_GEN6(dev) &&
-	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
-	    !(target_vma->bound & GLOBAL_BIND))) {
+	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
 		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
-				    GLOBAL_BIND);
+				    PIN_GLOBAL);
 		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
 			return ret;
 	}
@@ -585,7 +584,7 @@  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 	uint64_t flags;
 	int ret;
 
-	flags = 0;
+	flags = PIN_EXECBUF;
 	if (!drm_mm_node_allocated(&vma->node)) {
 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4e2caef83772..44827090a13f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1748,15 +1748,7 @@  void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 			continue;
 
 		i915_gem_clflush_object(obj, obj->pin_display);
-		/* The bind_vma code tries to be smart about tracking mappings.
-		 * Unfortunately above, we've just wiped out the mappings
-		 * without telling our object about it. So we need to fake it.
-		 *
-		 * Bind is not expected to fail since this is only called on
-		 * resume and assumption is all requirements exist already.
-		 */
-		vma->bound &= ~GLOBAL_BIND;
-		WARN_ON(i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND));
+		WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
 	}
 
 
@@ -1957,7 +1949,8 @@  static void i915_ggtt_bind_vma(struct i915_vma *vma,
 
 	BUG_ON(!i915_is_ggtt(vma->vm));
 	intel_gtt_insert_sg_entries(vma->ggtt_view.pages, entry, flags);
-	vma->bound = GLOBAL_BIND;
+
+	vma->bound |= GLOBAL_BIND;
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -1976,7 +1969,6 @@  static void i915_ggtt_unbind_vma(struct i915_vma *vma)
 	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
 
 	BUG_ON(!i915_is_ggtt(vma->vm));
-	vma->bound = 0;
 	intel_gtt_clear_range(first, size);
 }
 
@@ -1997,35 +1989,19 @@  static void ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_is_ggtt(vma->vm))
 		pages = vma->ggtt_view.pages;
 
-	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
-	 * or we have a global mapping already but the cacheability flags have
-	 * changed, set the global PTEs.
-	 *
-	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
-	 * instead if none of the above hold true.
-	 *
-	 * NB: A global mapping should only be needed for special regions like
-	 * "gtt mappable", SNB errata, or if specified via special execbuf
-	 * flags. At all other times, the GPU will use the aliasing PPGTT.
-	 */
 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
-		if (!(vma->bound & GLOBAL_BIND) ||
-		    (cache_level != obj->cache_level)) {
-			vma->vm->insert_entries(vma->vm, pages,
-						vma->node.start,
-						cache_level, pte_flags);
-			vma->bound |= GLOBAL_BIND;
-		}
+		vma->vm->insert_entries(vma->vm, pages,
+					vma->node.start,
+					cache_level, pte_flags);
+
+		vma->bound |= GLOBAL_BIND;
 	}
 
-	if (dev_priv->mm.aliasing_ppgtt &&
-	    (!(vma->bound & LOCAL_BIND) ||
-	     (cache_level != obj->cache_level))) {
+	if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base, pages,
 					    vma->node.start,
 					    cache_level, pte_flags);
-		vma->bound |= LOCAL_BIND;
 	}
 }
 
@@ -2040,16 +2016,14 @@  static void ggtt_unbind_vma(struct i915_vma *vma)
 				     vma->node.start,
 				     obj->base.size,
 				     true);
-		vma->bound &= ~GLOBAL_BIND;
 	}
 
-	if (vma->bound & LOCAL_BIND) {
+	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.clear_range(&appgtt->base,
 					 vma->node.start,
 					 obj->base.size,
 					 true);
-		vma->bound &= ~LOCAL_BIND;
 	}
 }
 
@@ -2839,6 +2813,7 @@  i915_get_ggtt_vma_pages(struct i915_vma *vma)
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags)
 {
+	u32 bind_flags = 0;
 	int ret;
 
 	if (vma->vm->allocate_va_range) {
@@ -2855,12 +2830,24 @@  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 
 	if (i915_is_ggtt(vma->vm)) {
 		ret = i915_get_ggtt_vma_pages(vma);
-
 		if (ret)
-			return ret;
+			return 0;
 	}
 
-	vma->vm->bind_vma(vma, cache_level, flags);
+	if (flags & PIN_GLOBAL)
+		bind_flags |= GLOBAL_BIND;
+	if (flags & PIN_EXECBUF)
+		bind_flags |= LOCAL_BIND;
+
+	if (flags & PIN_UPDATE)
+		bind_flags |= vma->bound;
+	else
+		bind_flags &= ~vma->bound;
+
+	if (bind_flags)
+		vma->vm->bind_vma(vma, cache_level, bind_flags);
+
+	vma->bound |= bind_flags;
 
 	return 0;
 }

Comments

On Tue, Apr 14, 2015 at 05:35:21PM +0200, Daniel Vetter wrote:
> Currently we have the problem that the decision whether ptes need to
> be (re)written is splattered all over the codebase. Move all that into
> i915_vma_bind. This needs a few changes:
> - Just reuse the PIN_* flags for i915_vma_bind and do the conversion
>   to vma->bound in there to avoid duplicating the conversion code all
>   over.
> - We need to make binding for EXECBUF (i.e. pick aliasing ppgtt if
>   around) explicit, add PIN_EXECBUF for that.

I am in favour of making the PIN_GLOBAL | PIN_LOCAL explicit, but
PIN_EXECBUF doesn't seem descriptive of what happens, nor why it should
be execbuf specific. Just use PIN_LOCAL with the execbuf oring in
PIN_GLOBAL as it needs for workarounds + relocations.
-chris
On Wed, Apr 15, 2015 at 11:47:02AM +0100, Chris Wilson wrote:
> On Tue, Apr 14, 2015 at 05:35:21PM +0200, Daniel Vetter wrote:
> > Currently we have the problem that the decision whether ptes need to
> > be (re)written is splattered all over the codebase. Move all that into
> > i915_vma_bind. This needs a few changes:
> > - Just reuse the PIN_* flags for i915_vma_bind and do the conversion
> >   to vma->bound in there to avoid duplicating the conversion code all
> >   over.
> > - We need to make binding for EXECBUF (i.e. pick aliasing ppgtt if
> >   around) explicit, add PIN_EXECBUF for that.
> 
> I am in favour of making the PIN_GLOBAL | PIN_LOCAL explicit, but
> PIN_EXECBUF doesn't seem descriptive of what happens, nor why it should
> be execbuf specific. Just use PIN_LOCAL with the execbuf oring in
> PIN_GLOBAL as it needs for workarounds + relocations.

Well I wasnt' too happy with LOCAL_BIND tbh since that sounds awfully
close to gen1 local memory ;-) My idea behind PIN_EXECBUF (and hiding the
ggtt vs. aliasing ppgtt binding in the low-level code) is that imo
conceptually global vs. aliasing ppgtt is just a pte bit with crazy
storage. We might as well have 2 bits to control access for priviledged
clients and for unpriviledge GT access, and in that case it would make
sense to hide that in the lower levels. ggtt+aliasing ppgtt isnt' anything
else, except for a rather peculariar storage format of these two bits.

Essentially we have:
- PIN_GLOBAL: Please set up pte so that system agent, display and
  priviledged GT can access it.
- PIN_EXECBUF: Please set up pte so that unpriveledged GT access is
  possible.

Maybe PIN_UNPRIV_GT or something else would be better? Pushing the
decision of how that access control is down up into the execbuf code is
imo a layering violation and exactly the kind of trouble that imo has lead
to rebinding or too-much-binding bugs we've had. With my approach it's all
together in the low-level gtt binding code.
-Daniel
On Thu, Apr 16, 2015 at 10:01:31AM +0200, Daniel Vetter wrote:
> On Wed, Apr 15, 2015 at 11:47:02AM +0100, Chris Wilson wrote:
> > On Tue, Apr 14, 2015 at 05:35:21PM +0200, Daniel Vetter wrote:
> > > Currently we have the problem that the decision whether ptes need to
> > > be (re)written is splattered all over the codebase. Move all that into
> > > i915_vma_bind. This needs a few changes:
> > > - Just reuse the PIN_* flags for i915_vma_bind and do the conversion
> > >   to vma->bound in there to avoid duplicating the conversion code all
> > >   over.
> > > - We need to make binding for EXECBUF (i.e. pick aliasing ppgtt if
> > >   around) explicit, add PIN_EXECBUF for that.
> > 
> > I am in favour of making the PIN_GLOBAL | PIN_LOCAL explicit, but
> > PIN_EXECBUF doesn't seem descriptive of what happens, nor why it should
> > be execbuf specific. Just use PIN_LOCAL with the execbuf oring in
> > PIN_GLOBAL as it needs for workarounds + relocations.
> 
> Well I wasnt' too happy with LOCAL_BIND tbh since that sounds awfully
> close to gen1 local memory ;-) My idea behind PIN_EXECBUF (and hiding the
> ggtt vs. aliasing ppgtt binding in the low-level code) is that imo
> conceptually global vs. aliasing ppgtt is just a pte bit with crazy
> storage. We might as well have 2 bits to control access for priviledged
> clients and for unpriviledge GT access, and in that case it would make
> sense to hide that in the lower levels. ggtt+aliasing ppgtt isnt' anything
> else, except for a rather peculariar storage format of these two bits.
> 
> Essentially we have:
> - PIN_GLOBAL: Please set up pte so that system agent, display and
>   priviledged GT can access it.
> - PIN_EXECBUF: Please set up pte so that unpriveledged GT access is
>   possible.
> 
> Maybe PIN_UNPRIV_GT or something else would be better? Pushing the
> decision of how that access control is down up into the execbuf code is
> imo a layering violation and exactly the kind of trouble that imo has lead
> to rebinding or too-much-binding bugs we've had. With my approach it's all
> together in the low-level gtt binding code.

Yeah, I'm just arguing that EXECBUF doesn't tell me anything at all
about what is happening. It is just not descriptive enough and is liable
to change meanings and end up in confusion.

How about PIN_GLOBAL (aka PIN_SYSTEM) and PIN_USER?
-Chris
On Thu, Apr 16, 2015 at 09:07:57AM +0100, Chris Wilson wrote:
> On Thu, Apr 16, 2015 at 10:01:31AM +0200, Daniel Vetter wrote:
> > On Wed, Apr 15, 2015 at 11:47:02AM +0100, Chris Wilson wrote:
> > > On Tue, Apr 14, 2015 at 05:35:21PM +0200, Daniel Vetter wrote:
> > > > Currently we have the problem that the decision whether ptes need to
> > > > be (re)written is splattered all over the codebase. Move all that into
> > > > i915_vma_bind. This needs a few changes:
> > > > - Just reuse the PIN_* flags for i915_vma_bind and do the conversion
> > > >   to vma->bound in there to avoid duplicating the conversion code all
> > > >   over.
> > > > - We need to make binding for EXECBUF (i.e. pick aliasing ppgtt if
> > > >   around) explicit, add PIN_EXECBUF for that.
> > > 
> > > I am in favour of making the PIN_GLOBAL | PIN_LOCAL explicit, but
> > > PIN_EXECBUF doesn't seem descriptive of what happens, nor why it should
> > > be execbuf specific. Just use PIN_LOCAL with the execbuf oring in
> > > PIN_GLOBAL as it needs for workarounds + relocations.
> > 
> > Well I wasnt' too happy with LOCAL_BIND tbh since that sounds awfully
> > close to gen1 local memory ;-) My idea behind PIN_EXECBUF (and hiding the
> > ggtt vs. aliasing ppgtt binding in the low-level code) is that imo
> > conceptually global vs. aliasing ppgtt is just a pte bit with crazy
> > storage. We might as well have 2 bits to control access for priviledged
> > clients and for unpriviledge GT access, and in that case it would make
> > sense to hide that in the lower levels. ggtt+aliasing ppgtt isnt' anything
> > else, except for a rather peculariar storage format of these two bits.
> > 
> > Essentially we have:
> > - PIN_GLOBAL: Please set up pte so that system agent, display and
> >   priviledged GT can access it.
> > - PIN_EXECBUF: Please set up pte so that unpriveledged GT access is
> >   possible.
> > 
> > Maybe PIN_UNPRIV_GT or something else would be better? Pushing the
> > decision of how that access control is down up into the execbuf code is
> > imo a layering violation and exactly the kind of trouble that imo has lead
> > to rebinding or too-much-binding bugs we've had. With my approach it's all
> > together in the low-level gtt binding code.
> 
> Yeah, I'm just arguing that EXECBUF doesn't tell me anything at all
> about what is happening. It is just not descriptive enough and is liable
> to change meanings and end up in confusion.
> 
> How about PIN_GLOBAL (aka PIN_SYSTEM) and PIN_USER?

PIN_USER sounds really good indeed. Will change.
-Daniel