[18/41] drm/i915: Move object backing storage manipulation to its own locking

Submitted by Chris Wilson on Oct. 14, 2016, 12:18 p.m.

Details

Message ID 20161014121833.439-19-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Browsing this patch as part of:
"Series without cover letter" rev 1 in Intel GFX
<< prev patch [18/41] next patch >>

Commit Message

Chris Wilson Oct. 14, 2016, 12:18 p.m.
Break the allocation of the backing storage away from struct_mutex into
a per-object lock. This allows parallel page allocation, provided we can
do so outside of struct_mutex (i.e. set-domain-ioctl, pwrite, GTT
fault), i.e. before execbuf! The increased cost of the atomic counters
are hidden behind i915_vma_pin() for the typical case of execbuf, i.e.
as the object is typically bound between execbufs, the page_pin_count is
static. The cost will be felt around set-domain and pwrite, but offset
by the improvement from reduced struct_mutex contention.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          | 34 ++++--------
 drivers/gpu/drm/i915/i915_gem.c          | 93 +++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 51 ++++++++++++------
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_userptr.c  | 10 ++--
 5 files changed, 112 insertions(+), 78 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ebadb5e55476..e066284aace9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2261,7 +2261,8 @@  struct drm_i915_gem_object {
 	unsigned int pin_display;
 
 	struct {
-		unsigned int pages_pin_count;
+		struct mutex lock;
+		atomic_t pages_pin_count;
 
 		struct sg_table *pages;
 		void *mapping;
@@ -2374,13 +2375,6 @@  i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
 	return atomic_read(&obj->base.refcount.refcount) == 0;
 }
 
-#if IS_ENABLED(CONFIG_LOCKDEP)
-#define lockdep_assert_held_unless(lock, cond) \
-	GEM_BUG_ON(debug_locks && !lockdep_is_held(lock) && !(cond))
-#else
-#define lockdep_assert_held_unless(lock, cond)
-#endif
-
 static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
@@ -3214,9 +3208,7 @@  int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 static inline int __must_check
 i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (obj->mm.pages_pin_count++)
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
 		return 0;
 
 	return __i915_gem_object_get_pages(obj);
@@ -3225,31 +3217,28 @@  i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
-				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!obj->mm.pages);
 
-	obj->mm.pages_pin_count++;
+	atomic_inc(&obj->mm.pages_pin_count);
 }
 
 static inline bool
 i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
 {
-	return obj->mm.pages_pin_count;
+	return atomic_read(&obj->mm.pages_pin_count);
 }
 
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
-				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 	GEM_BUG_ON(!obj->mm.pages);
 
-	obj->mm.pages_pin_count--;
+	atomic_dec(&obj->mm.pages_pin_count);
 }
 
-static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_unpin_pages(obj);
 }
@@ -3272,8 +3261,8 @@  enum i915_map_type {
  * the kernel address space. Based on the @type of mapping, the PTE will be
  * set to either WriteBack or WriteCombine (via pgprot_t).
  *
- * The caller must hold the struct_mutex, and is responsible for calling
- * i915_gem_object_unpin_map() when the mapping is no longer required.
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
  *
  * Returns the pointer through which to access the mapped object, or an
  * ERR_PTR() on error.
@@ -3289,12 +3278,9 @@  void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
  * with your access, call i915_gem_object_unpin_map() to release the pin
  * upon the mapping. Once the pin count reaches zero, that mapping may be
  * removed.
- *
- * The caller must hold the struct_mutex.
  */
 static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4faef4bd135b..13aa50e38bf1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2272,6 +2272,9 @@  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping;
 
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(obj->mm.pages);
+
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
 		i915_gem_object_truncate(obj);
@@ -2328,12 +2331,17 @@  void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	struct sg_table *pages;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (i915_gem_object_has_pinned_pages(obj))
 		return;
 
 	GEM_BUG_ON(obj->bind_count);
+	if (!READ_ONCE(obj->mm.pages))
+		return;
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, SINGLE_DEPTH_NESTING);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+		goto unlock;
 
 	/* ->put_pages might need to allocate memory for the bit17 swizzle
 	 * array, hence protect them from being reaped by removing them from gtt
@@ -2356,6 +2364,8 @@  void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_reset_page_iter(obj);
 
 	obj->ops->put_pages(obj, pages);
+unlock:
+	mutex_unlock(&obj->mm.lock);
 }
 
 static unsigned long swiotlb_max_size(void)
@@ -2489,7 +2499,7 @@  err_pages:
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held(&obj->mm.lock);
 
 	obj->mm.get_page.sg_pos = pages->sgl;
 	obj->mm.get_page.sg_idx = 0;
@@ -2515,9 +2525,9 @@  static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 }
 
 /* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_get_pages() may be called
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
  * multiple times before they are released by a single call to
- * i915_gem_object_put_pages() - once the pages are no longer referenced
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
  * either as a result of memory pressure (reaping pages under the shrinker)
  * or as the object is itself released.
  */
@@ -2525,15 +2535,23 @@  int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	int err;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
 
-	if (obj->mm.pages)
-		return 0;
+	if (likely(obj->mm.pages)) {
+		__i915_gem_object_pin_pages(obj);
+		goto unlock;
+	}
+
+	GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
 
 	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		__i915_gem_object_unpin_pages(obj);
+	if (!err)
+		atomic_set_release(&obj->mm.pages_pin_count, 1);
 
+unlock:
+	mutex_unlock(&obj->mm.lock);
 	return err;
 }
 
@@ -2593,20 +2611,29 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	void *ptr;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
 
-	ret = i915_gem_object_pin_pages(obj);
+	ret = mutex_lock_interruptible(&obj->mm.lock);
 	if (ret)
 		return ERR_PTR(ret);
 
-	pinned = obj->mm.pages_pin_count > 1;
+	pinned = true;
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		ret = ____i915_gem_object_get_pages(obj);
+		if (ret)
+			goto err_unlock;
+
+		GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count));
+		atomic_set_release(&obj->mm.pages_pin_count, 1);
+		pinned = false;
+	}
+	GEM_BUG_ON(!obj->mm.pages);
 
 	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
-			goto err;
+			goto err_unpin;
 		}
 
 		if (is_vmalloc_addr(ptr))
@@ -2621,17 +2648,21 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 		ptr = i915_gem_object_map(obj, type);
 		if (!ptr) {
 			ret = -ENOMEM;
-			goto err;
+			goto err_unpin;
 		}
 
 		obj->mm.mapping = ptr_pack_bits(ptr, type);
 	}
 
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
 	return ptr;
 
-err:
-	i915_gem_object_unpin_pages(obj);
-	return ERR_PTR(ret);
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(ret);
+	goto out_unlock;
 }
 
 static void
@@ -4269,7 +4300,7 @@  i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_madvise *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int err;
 
 	switch (args->madv) {
 	case I915_MADV_DONTNEED:
@@ -4279,15 +4310,13 @@  i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	    return -EINVAL;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = i915_gem_object_lookup(file_priv, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		goto out;
 
 	if (obj->mm.pages &&
 	    i915_gem_object_is_tiled(obj) &&
@@ -4306,11 +4335,11 @@  i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		i915_gem_object_truncate(obj);
 
 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
+	mutex_unlock(&obj->mm.lock);
 
+out:
 	i915_gem_object_put(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
+	return err;
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
@@ -4318,6 +4347,8 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	int i;
 
+	mutex_init(&obj->mm.lock);
+
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->userfault_link);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
@@ -4465,7 +4496,7 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		obj->ops->release(obj);
 
 	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-		obj->mm.pages_pin_count = 0;
+		atomic_set(&obj->mm.pages_pin_count, 0);
 	if (discard_backing_storage(obj))
 		obj->mm.madv = I915_MADV_DONTNEED;
 	__i915_gem_object_put_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f95061faeae6..c8a4c40ec2c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -48,6 +48,20 @@  static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
 
+static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+{
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (!mutex_is_locked_by(&dev->struct_mutex, current))
+			return false;
+
+		*unlock = false;
+	} else {
+		*unlock = true;
+	}
+
+	return true;
+}
+
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -66,6 +80,9 @@  static bool swap_available(void)
 
 static bool can_release_pages(struct drm_i915_gem_object *obj)
 {
+	if (!obj->mm.pages)
+		return false;
+
 	/* Only shmemfs objects are backed by swap */
 	if (!obj->base.filp)
 		return false;
@@ -78,7 +95,7 @@  static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * to the GPU, simply unbinding from the GPU is not going to succeed
 	 * in releasing our pin count on the pages themselves.
 	 */
-	if (obj->mm.pages_pin_count > obj->bind_count)
+	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
 		return false;
 
 	if (any_vma_pinned(obj))
@@ -95,7 +112,7 @@  static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
 	if (i915_gem_object_unbind(obj) == 0)
 		__i915_gem_object_put_pages(obj);
-	return !obj->mm.pages;
+	return !READ_ONCE(obj->mm.pages);
 }
 
 /**
@@ -135,6 +152,10 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 		{ NULL, 0 },
 	}, *phase;
 	unsigned long count = 0;
+	bool unlock;
+
+	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+		return 0;
 
 	trace_i915_gem_shrink(dev_priv, target, flags);
 	i915_gem_retire_requests(dev_priv);
@@ -199,8 +220,14 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 			i915_gem_object_get(obj);
 
-			if (unsafe_drop_pages(obj))
-				count += obj->base.size >> PAGE_SHIFT;
+			if (unsafe_drop_pages(obj)) {
+				mutex_lock(&obj->mm.lock);
+				if (!obj->mm.pages) {
+					__i915_gem_object_invalidate(obj);
+					count += obj->base.size >> PAGE_SHIFT;
+				}
+				mutex_unlock(&obj->mm.lock);
+			}
 
 			i915_gem_object_put(obj);
 		}
@@ -211,6 +238,9 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
+	if (unlock)
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+
 	/* expedite the RCU grace period to free some request slabs */
 	synchronize_rcu_expedited();
 
@@ -244,19 +274,6 @@  unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 	return freed;
 }
 
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
-{
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		if (!mutex_is_locked_by(&dev->struct_mutex, current))
-			return false;
-
-		*unlock = false;
-	} else
-		*unlock = true;
-
-	return true;
-}
-
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 34d5ada49720..6608799ee1f9 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -259,6 +259,7 @@  i915_gem_set_tiling(struct drm_device *dev, void *data,
 		if (!err) {
 			struct i915_vma *vma;
 
+			mutex_lock(&obj->mm.lock);
 			if (obj->mm.pages &&
 			    obj->mm.madv == I915_MADV_WILLNEED &&
 			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -267,6 +268,7 @@  i915_gem_set_tiling(struct drm_device *dev, void *data,
 				if (!i915_gem_object_is_tiled(obj))
 					__i915_gem_object_pin_pages(obj);
 			}
+			mutex_unlock(&obj->mm.lock);
 
 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
 				if (!vma->fence)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e97c7b589439..136c493b15b2 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -79,7 +79,7 @@  static void cancel_userptr(struct work_struct *work)
 	WARN_ONCE(obj->mm.pages,
 		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
 		  obj->bind_count,
-		  obj->mm.pages_pin_count,
+		  atomic_read(&obj->mm.pages_pin_count),
 		  obj->pin_display);
 
 	i915_gem_object_put(obj);
@@ -491,7 +491,6 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
 	struct drm_i915_gem_object *obj = work->obj;
-	struct drm_device *dev = obj->base.dev;
 	const int npages = obj->base.size >> PAGE_SHIFT;
 	struct page **pvec;
 	int pinned, ret;
@@ -523,7 +522,7 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 		}
 	}
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&obj->mm.lock);
 	if (obj->userptr.work == &work->work) {
 		struct sg_table *pages = ERR_PTR(ret);
 
@@ -538,13 +537,12 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 		obj->userptr.work = ERR_CAST(pages);
 	}
-
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&obj->mm.lock);
 
 	release_pages(pvec, pinned, 0);
 	drm_free_large(pvec);
 
+	i915_gem_object_put_unlocked(obj);
 	put_task_struct(work->task);
 	kfree(work);
 }