drm/i915: Switch obj->mm.lock lockdep annotations on its head

Submitted by Daniel Vetter on Aug. 16, 2019, 6:23 p.m.

Details

Message ID 20190816182336.27305-1-daniel.vetter@ffwll.ch
State New
Headers show
Series "drm/i915: Switch obj->mm.lock lockdep annotations on its head" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Daniel Vetter Aug. 16, 2019, 6:23 p.m.
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.

This patch is an attempt at trying better, by highlighting a bit more
the actual nature of the nesting that's going on. Essentially we have
two kinds of objects:

- objects without pages allocated, which cannot be on any lru and are
  hence inaccessible to the shrinker.

- objects which have pages allocated, which are on an lru, and which
  the shrinker can decide to throw out.

For the former type of object, memory allcoations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.

This is still not entirely fool-proof since the rules might chance.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:

- We can drop the nesting flag parameter from
  __i915_gem_object_put_pages, because that function by definition is
  never going allocate memory, and calling it on an object which
  doesn't have its pages allocated would be a bug.

- We strictly catch more bugs, since there's not only one place in the
  entire tree which is annotated with the special class. All the
  other places that had explicit lockdep nesting annotations we're now
  going to leave up to lockdep again.

- Specifically this catches stuff like calling get_pages from
  put_pages (which isn't really a good idea, if we can call put_pages
  so could the shrinker). I've seen patches do exactly that.

Of course I fully expect CI will show me for the fool I am with this
one here :-)

v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.

Real fix: don forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.

v3: Forgot usertptr too ...

v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16 +++++++++++++---
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----
 drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--
 drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------
 8 files changed, 45 insertions(+), 22 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 3929c3a6b281..d01258b175f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,6 +22,8 @@ 
  *
  */
 
+#include <linux/sched/mm.h>
+
 #include "display/intel_frontbuffer.h"
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
@@ -61,6 +63,15 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	mutex_init(&obj->mm.lock);
 
+	if (IS_ENABLED(CONFIG_LOCKDEP)) {
+		mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
+		fs_reclaim_acquire(GFP_KERNEL);
+		might_lock(&obj->mm.lock);
+		fs_reclaim_release(GFP_KERNEL);
+		mutex_unlock(&obj->mm.lock);
+	}
+
+
 	spin_lock_init(&obj->vma.lock);
 	INIT_LIST_HEAD(&obj->vma.list);
 
@@ -191,7 +202,7 @@  static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
 		atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		GEM_BUG_ON(i915_gem_object_has_pages(obj));
 		bitmap_free(obj->bit_17);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3714cf234d64..5ce511ca7fa8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -281,11 +281,21 @@  i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+	/*
+	 * Only used by struct_mutex, when called "recursively" from
+	 * direct-reclaim-esque. Safe because there is only every one
+	 * struct_mutex in the entire system. */
+	I915_MM_SHRINKER = 1,
+	/*
+	 * Used for obj->mm.lock when allocating pages. Safe because the object
+	 * isn't yet on any LRU, and therefore the shrinker can't deadlock on
+	 * it. As soon as the object has pages, obj->mm.lock nests within
+	 * fs_reclaim.
+	 */
+	I915_MM_GET_PAGES = 1,
 };
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index d474c6ac4100..42d114f27d1a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -157,7 +157,11 @@  struct drm_i915_gem_object {
 	unsigned int pin_global;
 
 	struct {
-		struct mutex lock; /* protects the pages and their use */
+		/*
+		 * Protects the pages and their use. Do not use directly, but
+		 * instead go through the pin/unpin interfaces.
+		 */
+		struct mutex lock;
 		atomic_t pages_pin_count;
 
 		struct sg_table *pages;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 18f0ce0135c1..202526e8910f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -101,7 +101,7 @@  int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	int err;
 
-	err = mutex_lock_interruptible(&obj->mm.lock);
+	err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (err)
 		return err;
 
@@ -179,8 +179,7 @@  __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	return pages;
 }
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	struct sg_table *pages;
 	int err;
@@ -191,7 +190,7 @@  int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(atomic_read(&obj->bind_count));
 
 	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
+	mutex_lock(&obj->mm.lock);
 	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
 		err = -EBUSY;
 		goto unlock;
@@ -285,7 +284,7 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	if (unlikely(!i915_gem_object_has_struct_page(obj)))
 		return ERR_PTR(-ENXIO);
 
-	err = mutex_lock_interruptible(&obj->mm.lock);
+	err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (err)
 		return ERR_PTR(err);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 102fd7a23d3d..209925be8a76 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -156,7 +156,7 @@  int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (err)
 		return err;
 
-	mutex_lock(&obj->mm.lock);
+	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 
 	if (obj->mm.madv != I915_MADV_WILLNEED) {
 		err = -EFAULT;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index edd21d14e64f..0b0d6e27b996 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -98,7 +98,7 @@  static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
 		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
 
 	if (i915_gem_object_unbind(obj, flags) == 0)
-		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+		__i915_gem_object_put_pages(obj);
 
 	return !i915_gem_object_has_pages(obj);
 }
@@ -254,8 +254,7 @@  i915_gem_shrink(struct drm_i915_private *i915,
 
 			if (unsafe_drop_pages(obj, shrink)) {
 				/* May arrive from get_pages on another bo */
-				mutex_lock_nested(&obj->mm.lock,
-						  I915_MM_SHRINKER);
+				mutex_lock(&obj->mm.lock);
 				if (!i915_gem_object_has_pages(obj)) {
 					try_to_writeback(obj, shrink);
 					count += obj->base.size >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 70dc506a5426..f3b3bc7c32cb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -158,7 +158,7 @@  userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		ret = i915_gem_object_unbind(obj,
 					     I915_GEM_OBJECT_UNBIND_ACTIVE);
 		if (ret == 0)
-			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+			ret = __i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 		if (ret)
 			goto unlock;
@@ -514,7 +514,7 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 		}
 	}
 
-	mutex_lock(&obj->mm.lock);
+	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 	if (obj->userptr.work == &work->work) {
 		struct sg_table *pages = ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6cbd4a668c9a..df586035c33e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -562,7 +562,7 @@  static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		i915_vma_close(vma);
 
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 
@@ -590,7 +590,7 @@  static void close_object_list(struct list_head *objects,
 
 		list_del(&obj->st_link);
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 }
@@ -860,7 +860,7 @@  static int igt_mock_ppgtt_64K(void *arg)
 			i915_vma_close(vma);
 
 			i915_gem_object_unpin_pages(obj);
-			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
 		}
 	}
@@ -1268,7 +1268,7 @@  static int igt_ppgtt_exhaust_huge(void *arg)
 			}
 
 			i915_gem_object_unpin_pages(obj);
-			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
 		}
 	}
@@ -1330,7 +1330,7 @@  static int igt_ppgtt_internal_huge(void *arg)
 		}
 
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 
@@ -1399,7 +1399,7 @@  static int igt_ppgtt_gemfs_huge(void *arg)
 		}
 
 		i915_gem_object_unpin_pages(obj);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
 	}
 

Comments

Quoting Daniel Vetter (2019-08-16 19:23:36)
> The trouble with having a plain nesting flag for locks which do not
> naturally nest (unlike block devices and their partitions, which is
> the original motivation for nesting levels) is that lockdep will
> never spot a true deadlock if you screw up.
> 
> This patch is an attempt at trying better, by highlighting a bit more
> the actual nature of the nesting that's going on. Essentially we have
> two kinds of objects:
> 
> - objects without pages allocated, which cannot be on any lru and are
>   hence inaccessible to the shrinker.
> 
> - objects which have pages allocated, which are on an lru, and which
>   the shrinker can decide to throw out.
> 
> For the former type of object, memory allcoations while holding
> obj->mm.lock are permissible. For the latter they are not. And
> get/put_pages transitions between the two types of objects.
> 
> This is still not entirely fool-proof since the rules might chance.
> But as long as we run such a code ever at runtime lockdep should be
> able to observe the inconsistency and complain (like with any other
> lockdep class that we've split up in multiple classes). But there are
> a few clear benefits:
> 
> - We can drop the nesting flag parameter from
>   __i915_gem_object_put_pages, because that function by definition is
>   never going allocate memory, and calling it on an object which
>   doesn't have its pages allocated would be a bug.
> 
> - We strictly catch more bugs, since there's not only one place in the
>   entire tree which is annotated with the special class. All the
>   other places that had explicit lockdep nesting annotations we're now
>   going to leave up to lockdep again.
> 
> - Specifically this catches stuff like calling get_pages from
>   put_pages (which isn't really a good idea, if we can call put_pages
>   so could the shrinker). I've seen patches do exactly that.
> 
> Of course I fully expect CI will show me for the fool I am with this
> one here :-)
> 
> v2: There can only be one (lockdep only has a cache for the first
> subclass, not for deeper ones, and we don't want to make these locks
> even slower). Still separate enums for better documentation.
> 
> Real fix: don forget about phys objs and pin_map(), and fix the
> shrinker to have the right annotations ... silly me.
> 
> v3: Forgot usertptr too ...
> 
> v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
> and instead prime lockdep (Chris).
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: "Tang, CQ" <cq.tang@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-
>  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16 +++++++++++++---
>  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----
>  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---
>  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--
>  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------
>  8 files changed, 45 insertions(+), 22 deletions(-)

static inline int __must_check
i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
{
        might_lock(&obj->mm.lock);

        if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
                return 0;

        return __i915_gem_object_get_pages(obj);
}

is now testing the wrong lock class.

> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 3929c3a6b281..d01258b175f5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -22,6 +22,8 @@
>   *
>   */
>  
> +#include <linux/sched/mm.h>
> +
>  #include "display/intel_frontbuffer.h"
>  #include "gt/intel_gt.h"
>  #include "i915_drv.h"
> @@ -61,6 +63,15 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  {
>         mutex_init(&obj->mm.lock);
>  
> +       if (IS_ENABLED(CONFIG_LOCKDEP)) {
> +               mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
> +               fs_reclaim_acquire(GFP_KERNEL);
> +               might_lock(&obj->mm.lock);
> +               fs_reclaim_release(GFP_KERNEL);
> +               mutex_unlock(&obj->mm.lock);
> +       }

This is very powerful and sells a lot of churn.
-Chris
> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch]
> Sent: Friday, August 16, 2019 11:24 AM
> To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>; Chris Wilson <chris@chris-
> wilson.co.uk>; Tang, CQ <cq.tang@intel.com>; Ursulin, Tvrtko
> <tvrtko.ursulin@intel.com>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Vetter, Daniel <daniel.vetter@intel.com>
> Subject: [PATCH] drm/i915: Switch obj->mm.lock lockdep annotations on its
> head
> 
> The trouble with having a plain nesting flag for locks which do not naturally
> nest (unlike block devices and their partitions, which is the original motivation
> for nesting levels) is that lockdep will never spot a true deadlock if you screw
> up.
> 
> This patch is an attempt at trying better, by highlighting a bit more the actual
> nature of the nesting that's going on. Essentially we have two kinds of
> objects:
> 
> - objects without pages allocated, which cannot be on any lru and are
>   hence inaccessible to the shrinker.
> 
> - objects which have pages allocated, which are on an lru, and which
>   the shrinker can decide to throw out.
> 
> For the former type of object, memory allcoations while holding
> obj->mm.lock are permissible. For the latter they are not. And
> get/put_pages transitions between the two types of objects.
> 
> This is still not entirely fool-proof since the rules might chance.
> But as long as we run such a code ever at runtime lockdep should be able to
> observe the inconsistency and complain (like with any other lockdep class
> that we've split up in multiple classes). But there are a few clear benefits:
> 
> - We can drop the nesting flag parameter from
>   __i915_gem_object_put_pages, because that function by definition is
>   never going allocate memory, and calling it on an object which
>   doesn't have its pages allocated would be a bug.
> 
> - We strictly catch more bugs, since there's not only one place in the
>   entire tree which is annotated with the special class. All the
>   other places that had explicit lockdep nesting annotations we're now
>   going to leave up to lockdep again.
> 
> - Specifically this catches stuff like calling get_pages from
>   put_pages (which isn't really a good idea, if we can call put_pages
>   so could the shrinker). I've seen patches do exactly that.
> 
> Of course I fully expect CI will show me for the fool I am with this one here :-)
> 
> v2: There can only be one (lockdep only has a cache for the first subclass, not
> for deeper ones, and we don't want to make these locks even slower). Still
> separate enums for better documentation.
> 
> Real fix: don forget about phys objs and pin_map(), and fix the shrinker to
> have the right annotations ... silly me.
> 
> v3: Forgot usertptr too ...
> 
> v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
> and instead prime lockdep (Chris).
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: "Tang, CQ" <cq.tang@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-
>  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16 +++++++++++++---
>  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----
>  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---
>  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--
>  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------
>  8 files changed, 45 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 3929c3a6b281..d01258b175f5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -22,6 +22,8 @@
>   *
>   */
> 
> +#include <linux/sched/mm.h>
> +
>  #include "display/intel_frontbuffer.h"
>  #include "gt/intel_gt.h"
>  #include "i915_drv.h"
> @@ -61,6 +63,15 @@ void i915_gem_object_init(struct
> drm_i915_gem_object *obj,  {
>  	mutex_init(&obj->mm.lock);
> 
> +	if (IS_ENABLED(CONFIG_LOCKDEP)) {
> +		mutex_lock_nested(&obj->mm.lock,
> I915_MM_GET_PAGES);
> +		fs_reclaim_acquire(GFP_KERNEL);
> +		might_lock(&obj->mm.lock);
> +		fs_reclaim_release(GFP_KERNEL);
> +		mutex_unlock(&obj->mm.lock);
> +	}
> +
> +
>  	spin_lock_init(&obj->vma.lock);
>  	INIT_LIST_HEAD(&obj->vma.list);
> 
> @@ -191,7 +202,7 @@ static void __i915_gem_free_objects(struct
> drm_i915_private *i915,
>  		GEM_BUG_ON(!list_empty(&obj->lut_list));
> 
>  		atomic_set(&obj->mm.pages_pin_count, 0);
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> +		__i915_gem_object_put_pages(obj);
>  		GEM_BUG_ON(i915_gem_object_has_pages(obj));
>  		bitmap_free(obj->bit_17);
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 3714cf234d64..5ce511ca7fa8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -281,11 +281,21 @@ i915_gem_object_unpin_pages(struct
> drm_i915_gem_object *obj)
> 
>  enum i915_mm_subclass { /* lockdep subclass for obj-
> >mm.lock/struct_mutex */
>  	I915_MM_NORMAL = 0,
> -	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-
> esque */
> +	/*
> +	 * Only used by struct_mutex, when called "recursively" from
> +	 * direct-reclaim-esque. Safe because there is only every one
> +	 * struct_mutex in the entire system. */
> +	I915_MM_SHRINKER = 1,
> +	/*
> +	 * Used for obj->mm.lock when allocating pages. Safe because the
> object
> +	 * isn't yet on any LRU, and therefore the shrinker can't deadlock on
> +	 * it. As soon as the object has pages, obj->mm.lock nests within
> +	 * fs_reclaim.
> +	 */
> +	I915_MM_GET_PAGES = 1,

If both have the same value, why bother to use two names? Can we use a single generic name?

--CQ

>  };
> 
> -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				enum i915_mm_subclass subclass);
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  void i915_gem_object_truncate(struct drm_i915_gem_object *obj);  void
> i915_gem_object_writeback(struct drm_i915_gem_object *obj);
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index d474c6ac4100..42d114f27d1a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -157,7 +157,11 @@ struct drm_i915_gem_object {
>  	unsigned int pin_global;
> 
>  	struct {
> -		struct mutex lock; /* protects the pages and their use */
> +		/*
> +		 * Protects the pages and their use. Do not use directly, but
> +		 * instead go through the pin/unpin interfaces.
> +		 */
> +		struct mutex lock;
>  		atomic_t pages_pin_count;
> 
>  		struct sg_table *pages;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 18f0ce0135c1..202526e8910f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -101,7 +101,7 @@ int __i915_gem_object_get_pages(struct
> drm_i915_gem_object *obj)  {
>  	int err;
> 
> -	err = mutex_lock_interruptible(&obj->mm.lock);
> +	err = mutex_lock_interruptible_nested(&obj->mm.lock,
> +I915_MM_GET_PAGES);
>  	if (err)
>  		return err;
> 
> @@ -179,8 +179,7 @@ __i915_gem_object_unset_pages(struct
> drm_i915_gem_object *obj)
>  	return pages;
>  }
> 
> -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				enum i915_mm_subclass subclass)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>  {
>  	struct sg_table *pages;
>  	int err;
> @@ -191,7 +190,7 @@ int __i915_gem_object_put_pages(struct
> drm_i915_gem_object *obj,
>  	GEM_BUG_ON(atomic_read(&obj->bind_count));
> 
>  	/* May be called by shrinker from within get_pages() (on another bo)
> */
> -	mutex_lock_nested(&obj->mm.lock, subclass);
> +	mutex_lock(&obj->mm.lock);
>  	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
>  		err = -EBUSY;
>  		goto unlock;
> @@ -285,7 +284,7 @@ void *i915_gem_object_pin_map(struct
> drm_i915_gem_object *obj,
>  	if (unlikely(!i915_gem_object_has_struct_page(obj)))
>  		return ERR_PTR(-ENXIO);
> 
> -	err = mutex_lock_interruptible(&obj->mm.lock);
> +	err = mutex_lock_interruptible_nested(&obj->mm.lock,
> +I915_MM_GET_PAGES);
>  	if (err)
>  		return ERR_PTR(err);
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> index 102fd7a23d3d..209925be8a76 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> @@ -156,7 +156,7 @@ int i915_gem_object_attach_phys(struct
> drm_i915_gem_object *obj, int align)
>  	if (err)
>  		return err;
> 
> -	mutex_lock(&obj->mm.lock);
> +	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
> 
>  	if (obj->mm.madv != I915_MADV_WILLNEED) {
>  		err = -EFAULT;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> index edd21d14e64f..0b0d6e27b996 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> @@ -98,7 +98,7 @@ static bool unsafe_drop_pages(struct
> drm_i915_gem_object *obj,
>  		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
> 
>  	if (i915_gem_object_unbind(obj, flags) == 0)
> -		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> +		__i915_gem_object_put_pages(obj);
> 
>  	return !i915_gem_object_has_pages(obj);  } @@ -254,8 +254,7 @@
> i915_gem_shrink(struct drm_i915_private *i915,
> 
>  			if (unsafe_drop_pages(obj, shrink)) {
>  				/* May arrive from get_pages on another bo
> */
> -				mutex_lock_nested(&obj->mm.lock,
> -						  I915_MM_SHRINKER);
> +				mutex_lock(&obj->mm.lock);
>  				if (!i915_gem_object_has_pages(obj)) {
>  					try_to_writeback(obj, shrink);
>  					count += obj->base.size >>
> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> index 70dc506a5426..f3b3bc7c32cb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> @@ -158,7 +158,7 @@ userptr_mn_invalidate_range_start(struct
> mmu_notifier *_mn,
>  		ret = i915_gem_object_unbind(obj,
> 
> I915_GEM_OBJECT_UNBIND_ACTIVE);
>  		if (ret == 0)
> -			ret = __i915_gem_object_put_pages(obj,
> I915_MM_SHRINKER);
> +			ret = __i915_gem_object_put_pages(obj);
>  		i915_gem_object_put(obj);
>  		if (ret)
>  			goto unlock;
> @@ -514,7 +514,7 @@ __i915_gem_userptr_get_pages_worker(struct
> work_struct *_work)
>  		}
>  	}
> 
> -	mutex_lock(&obj->mm.lock);
> +	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
>  	if (obj->userptr.work == &work->work) {
>  		struct sg_table *pages = ERR_PTR(ret);
> 
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 6cbd4a668c9a..df586035c33e 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -562,7 +562,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
>  		i915_vma_close(vma);
> 
>  		i915_gem_object_unpin_pages(obj);
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> +		__i915_gem_object_put_pages(obj);
>  		i915_gem_object_put(obj);
>  	}
> 
> @@ -590,7 +590,7 @@ static void close_object_list(struct list_head *objects,
> 
>  		list_del(&obj->st_link);
>  		i915_gem_object_unpin_pages(obj);
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> +		__i915_gem_object_put_pages(obj);
>  		i915_gem_object_put(obj);
>  	}
>  }
> @@ -860,7 +860,7 @@ static int igt_mock_ppgtt_64K(void *arg)
>  			i915_vma_close(vma);
> 
>  			i915_gem_object_unpin_pages(obj);
> -			__i915_gem_object_put_pages(obj,
> I915_MM_NORMAL);
> +			__i915_gem_object_put_pages(obj);
>  			i915_gem_object_put(obj);
>  		}
>  	}
> @@ -1268,7 +1268,7 @@ static int igt_ppgtt_exhaust_huge(void *arg)
>  			}
> 
>  			i915_gem_object_unpin_pages(obj);
> -			__i915_gem_object_put_pages(obj,
> I915_MM_NORMAL);
> +			__i915_gem_object_put_pages(obj);
>  			i915_gem_object_put(obj);
>  		}
>  	}
> @@ -1330,7 +1330,7 @@ static int igt_ppgtt_internal_huge(void *arg)
>  		}
> 
>  		i915_gem_object_unpin_pages(obj);
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> +		__i915_gem_object_put_pages(obj);
>  		i915_gem_object_put(obj);
>  	}
> 
> @@ -1399,7 +1399,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
>  		}
> 
>  		i915_gem_object_unpin_pages(obj);
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> +		__i915_gem_object_put_pages(obj);
>  		i915_gem_object_put(obj);
>  	}
> 
> --
> 2.23.0.rc1
On Fri, Aug 16, 2019 at 8:46 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Daniel Vetter (2019-08-16 19:23:36)
> > The trouble with having a plain nesting flag for locks which do not
> > naturally nest (unlike block devices and their partitions, which is
> > the original motivation for nesting levels) is that lockdep will
> > never spot a true deadlock if you screw up.
> >
> > This patch is an attempt at trying better, by highlighting a bit more
> > the actual nature of the nesting that's going on. Essentially we have
> > two kinds of objects:
> >
> > - objects without pages allocated, which cannot be on any lru and are
> >   hence inaccessible to the shrinker.
> >
> > - objects which have pages allocated, which are on an lru, and which
> >   the shrinker can decide to throw out.
> >
> > For the former type of object, memory allcoations while holding
> > obj->mm.lock are permissible. For the latter they are not. And
> > get/put_pages transitions between the two types of objects.
> >
> > This is still not entirely fool-proof since the rules might chance.
> > But as long as we run such a code ever at runtime lockdep should be
> > able to observe the inconsistency and complain (like with any other
> > lockdep class that we've split up in multiple classes). But there are
> > a few clear benefits:
> >
> > - We can drop the nesting flag parameter from
> >   __i915_gem_object_put_pages, because that function by definition is
> >   never going allocate memory, and calling it on an object which
> >   doesn't have its pages allocated would be a bug.
> >
> > - We strictly catch more bugs, since there's not only one place in the
> >   entire tree which is annotated with the special class. All the
> >   other places that had explicit lockdep nesting annotations we're now
> >   going to leave up to lockdep again.
> >
> > - Specifically this catches stuff like calling get_pages from
> >   put_pages (which isn't really a good idea, if we can call put_pages
> >   so could the shrinker). I've seen patches do exactly that.
> >
> > Of course I fully expect CI will show me for the fool I am with this
> > one here :-)
> >
> > v2: There can only be one (lockdep only has a cache for the first
> > subclass, not for deeper ones, and we don't want to make these locks
> > even slower). Still separate enums for better documentation.
> >
> > Real fix: don forget about phys objs and pin_map(), and fix the
> > shrinker to have the right annotations ... silly me.
> >
> > v3: Forgot usertptr too ...
> >
> > v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
> > and instead prime lockdep (Chris).
> >
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: "Tang, CQ" <cq.tang@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-
> >  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16 +++++++++++++---
> >  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-
> >  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----
> >  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---
> >  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--
> >  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------
> >  8 files changed, 45 insertions(+), 22 deletions(-)
>
> static inline int __must_check
> i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> {
>         might_lock(&obj->mm.lock);
>
>         if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
>                 return 0;
>
>         return __i915_gem_object_get_pages(obj);
> }
>
> is now testing the wrong lock class.

Unfortunately there's no might_lock_nested.

But then, this is the best kind of wrong, because of the nesting we have:

obj->mm.lock#I915_MM_GET_PAGES -> fs_reclaim -> obj->mm.lock

So the might_lock we have actually checks for way more than just the
"more correct" annotation. I think I'll just add the above as a
comment and leave the code as-is. Thoughts?

> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 3929c3a6b281..d01258b175f5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -22,6 +22,8 @@
> >   *
> >   */
> >
> > +#include <linux/sched/mm.h>
> > +
> >  #include "display/intel_frontbuffer.h"
> >  #include "gt/intel_gt.h"
> >  #include "i915_drv.h"
> > @@ -61,6 +63,15 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >  {
> >         mutex_init(&obj->mm.lock);
> >
> > +       if (IS_ENABLED(CONFIG_LOCKDEP)) {
> > +               mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
> > +               fs_reclaim_acquire(GFP_KERNEL);
> > +               might_lock(&obj->mm.lock);
> > +               fs_reclaim_release(GFP_KERNEL);
> > +               mutex_unlock(&obj->mm.lock);
> > +       }
>
> This is very powerful and sells a lot of churn.

Yeah that was the idea here. Plus I hope it's the easier to understand
the annotations and lock nesting rules for obj->mm.lock this way - I
freaked out quite a bit about the current one until you convinced me
(which took it's sweet time) that it's all fine. Maybe explicitly
annotating get_pages and it's special rule will help others (I can't
play guinea pig twice unfortunately, so we can't test that theory).
-Daniel
On Fri, Aug 16, 2019 at 9:23 PM Tang, CQ <cq.tang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch]
> > Sent: Friday, August 16, 2019 11:24 AM
> > To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>; Chris Wilson <chris@chris-
> > wilson.co.uk>; Tang, CQ <cq.tang@intel.com>; Ursulin, Tvrtko
> > <tvrtko.ursulin@intel.com>; Joonas Lahtinen
> > <joonas.lahtinen@linux.intel.com>; Vetter, Daniel <daniel.vetter@intel.com>
> > Subject: [PATCH] drm/i915: Switch obj->mm.lock lockdep annotations on its
> > head
> >
> > The trouble with having a plain nesting flag for locks which do not naturally
> > nest (unlike block devices and their partitions, which is the original motivation
> > for nesting levels) is that lockdep will never spot a true deadlock if you screw
> > up.
> >
> > This patch is an attempt at trying better, by highlighting a bit more the actual
> > nature of the nesting that's going on. Essentially we have two kinds of
> > objects:
> >
> > - objects without pages allocated, which cannot be on any lru and are
> >   hence inaccessible to the shrinker.
> >
> > - objects which have pages allocated, which are on an lru, and which
> >   the shrinker can decide to throw out.
> >
> > For the former type of object, memory allcoations while holding
> > obj->mm.lock are permissible. For the latter they are not. And
> > get/put_pages transitions between the two types of objects.
> >
> > This is still not entirely fool-proof since the rules might chance.
> > But as long as we run such a code ever at runtime lockdep should be able to
> > observe the inconsistency and complain (like with any other lockdep class
> > that we've split up in multiple classes). But there are a few clear benefits:
> >
> > - We can drop the nesting flag parameter from
> >   __i915_gem_object_put_pages, because that function by definition is
> >   never going allocate memory, and calling it on an object which
> >   doesn't have its pages allocated would be a bug.
> >
> > - We strictly catch more bugs, since there's not only one place in the
> >   entire tree which is annotated with the special class. All the
> >   other places that had explicit lockdep nesting annotations we're now
> >   going to leave up to lockdep again.
> >
> > - Specifically this catches stuff like calling get_pages from
> >   put_pages (which isn't really a good idea, if we can call put_pages
> >   so could the shrinker). I've seen patches do exactly that.
> >
> > Of course I fully expect CI will show me for the fool I am with this one here :-)
> >
> > v2: There can only be one (lockdep only has a cache for the first subclass, not
> > for deeper ones, and we don't want to make these locks even slower). Still
> > separate enums for better documentation.
> >
> > Real fix: don forget about phys objs and pin_map(), and fix the shrinker to
> > have the right annotations ... silly me.
> >
> > v3: Forgot usertptr too ...
> >
> > v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
> > and instead prime lockdep (Chris).
> >
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: "Tang, CQ" <cq.tang@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-
> >  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16 +++++++++++++---
> >  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-
> >  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----
> >  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---
> >  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--
> >  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------
> >  8 files changed, 45 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > index 3929c3a6b281..d01258b175f5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> > @@ -22,6 +22,8 @@
> >   *
> >   */
> >
> > +#include <linux/sched/mm.h>
> > +
> >  #include "display/intel_frontbuffer.h"
> >  #include "gt/intel_gt.h"
> >  #include "i915_drv.h"
> > @@ -61,6 +63,15 @@ void i915_gem_object_init(struct
> > drm_i915_gem_object *obj,  {
> >       mutex_init(&obj->mm.lock);
> >
> > +     if (IS_ENABLED(CONFIG_LOCKDEP)) {
> > +             mutex_lock_nested(&obj->mm.lock,
> > I915_MM_GET_PAGES);
> > +             fs_reclaim_acquire(GFP_KERNEL);
> > +             might_lock(&obj->mm.lock);
> > +             fs_reclaim_release(GFP_KERNEL);
> > +             mutex_unlock(&obj->mm.lock);
> > +     }
> > +
> > +
> >       spin_lock_init(&obj->vma.lock);
> >       INIT_LIST_HEAD(&obj->vma.list);
> >
> > @@ -191,7 +202,7 @@ static void __i915_gem_free_objects(struct
> > drm_i915_private *i915,
> >               GEM_BUG_ON(!list_empty(&obj->lut_list));
> >
> >               atomic_set(&obj->mm.pages_pin_count, 0);
> > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> > +             __i915_gem_object_put_pages(obj);
> >               GEM_BUG_ON(i915_gem_object_has_pages(obj));
> >               bitmap_free(obj->bit_17);
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index 3714cf234d64..5ce511ca7fa8 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -281,11 +281,21 @@ i915_gem_object_unpin_pages(struct
> > drm_i915_gem_object *obj)
> >
> >  enum i915_mm_subclass { /* lockdep subclass for obj-
> > >mm.lock/struct_mutex */
> >       I915_MM_NORMAL = 0,
> > -     I915_MM_SHRINKER /* called "recursively" from direct-reclaim-
> > esque */
> > +     /*
> > +      * Only used by struct_mutex, when called "recursively" from
> > +      * direct-reclaim-esque. Safe because there is only every one
> > +      * struct_mutex in the entire system. */
> > +     I915_MM_SHRINKER = 1,
> > +     /*
> > +      * Used for obj->mm.lock when allocating pages. Safe because the
> > object
> > +      * isn't yet on any LRU, and therefore the shrinker can't deadlock on
> > +      * it. As soon as the object has pages, obj->mm.lock nests within
> > +      * fs_reclaim.
> > +      */
> > +     I915_MM_GET_PAGES = 1,
>
> If both have the same value, why bother to use two names? Can we use a single generic name?

They're two totally different things. The commit message explains why
I've picked the same value for both.

I mean you're essentially arguing (thought to the extreme conclusion
at least) that every #define SOMETHING 1 should be replaced by the
same define. That defeats the point of having meaningful names for
values ...

Cheers, Daniel

>
> --CQ
>
> >  };
> >
> > -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > -                             enum i915_mm_subclass subclass);
> > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> >  void i915_gem_object_truncate(struct drm_i915_gem_object *obj);  void
> > i915_gem_object_writeback(struct drm_i915_gem_object *obj);
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index d474c6ac4100..42d114f27d1a 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -157,7 +157,11 @@ struct drm_i915_gem_object {
> >       unsigned int pin_global;
> >
> >       struct {
> > -             struct mutex lock; /* protects the pages and their use */
> > +             /*
> > +              * Protects the pages and their use. Do not use directly, but
> > +              * instead go through the pin/unpin interfaces.
> > +              */
> > +             struct mutex lock;
> >               atomic_t pages_pin_count;
> >
> >               struct sg_table *pages;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > index 18f0ce0135c1..202526e8910f 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> > @@ -101,7 +101,7 @@ int __i915_gem_object_get_pages(struct
> > drm_i915_gem_object *obj)  {
> >       int err;
> >
> > -     err = mutex_lock_interruptible(&obj->mm.lock);
> > +     err = mutex_lock_interruptible_nested(&obj->mm.lock,
> > +I915_MM_GET_PAGES);
> >       if (err)
> >               return err;
> >
> > @@ -179,8 +179,7 @@ __i915_gem_object_unset_pages(struct
> > drm_i915_gem_object *obj)
> >       return pages;
> >  }
> >
> > -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> > -                             enum i915_mm_subclass subclass)
> > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> >  {
> >       struct sg_table *pages;
> >       int err;
> > @@ -191,7 +190,7 @@ int __i915_gem_object_put_pages(struct
> > drm_i915_gem_object *obj,
> >       GEM_BUG_ON(atomic_read(&obj->bind_count));
> >
> >       /* May be called by shrinker from within get_pages() (on another bo)
> > */
> > -     mutex_lock_nested(&obj->mm.lock, subclass);
> > +     mutex_lock(&obj->mm.lock);
> >       if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
> >               err = -EBUSY;
> >               goto unlock;
> > @@ -285,7 +284,7 @@ void *i915_gem_object_pin_map(struct
> > drm_i915_gem_object *obj,
> >       if (unlikely(!i915_gem_object_has_struct_page(obj)))
> >               return ERR_PTR(-ENXIO);
> >
> > -     err = mutex_lock_interruptible(&obj->mm.lock);
> > +     err = mutex_lock_interruptible_nested(&obj->mm.lock,
> > +I915_MM_GET_PAGES);
> >       if (err)
> >               return ERR_PTR(err);
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> > index 102fd7a23d3d..209925be8a76 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
> > @@ -156,7 +156,7 @@ int i915_gem_object_attach_phys(struct
> > drm_i915_gem_object *obj, int align)
> >       if (err)
> >               return err;
> >
> > -     mutex_lock(&obj->mm.lock);
> > +     mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
> >
> >       if (obj->mm.madv != I915_MADV_WILLNEED) {
> >               err = -EFAULT;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > index edd21d14e64f..0b0d6e27b996 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > @@ -98,7 +98,7 @@ static bool unsafe_drop_pages(struct
> > drm_i915_gem_object *obj,
> >               flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
> >
> >       if (i915_gem_object_unbind(obj, flags) == 0)
> > -             __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> > +             __i915_gem_object_put_pages(obj);
> >
> >       return !i915_gem_object_has_pages(obj);  } @@ -254,8 +254,7 @@
> > i915_gem_shrink(struct drm_i915_private *i915,
> >
> >                       if (unsafe_drop_pages(obj, shrink)) {
> >                               /* May arrive from get_pages on another bo
> > */
> > -                             mutex_lock_nested(&obj->mm.lock,
> > -                                               I915_MM_SHRINKER);
> > +                             mutex_lock(&obj->mm.lock);
> >                               if (!i915_gem_object_has_pages(obj)) {
> >                                       try_to_writeback(obj, shrink);
> >                                       count += obj->base.size >>
> > PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> > index 70dc506a5426..f3b3bc7c32cb 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> > @@ -158,7 +158,7 @@ userptr_mn_invalidate_range_start(struct
> > mmu_notifier *_mn,
> >               ret = i915_gem_object_unbind(obj,
> >
> > I915_GEM_OBJECT_UNBIND_ACTIVE);
> >               if (ret == 0)
> > -                     ret = __i915_gem_object_put_pages(obj,
> > I915_MM_SHRINKER);
> > +                     ret = __i915_gem_object_put_pages(obj);
> >               i915_gem_object_put(obj);
> >               if (ret)
> >                       goto unlock;
> > @@ -514,7 +514,7 @@ __i915_gem_userptr_get_pages_worker(struct
> > work_struct *_work)
> >               }
> >       }
> >
> > -     mutex_lock(&obj->mm.lock);
> > +     mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
> >       if (obj->userptr.work == &work->work) {
> >               struct sg_table *pages = ERR_PTR(ret);
> >
> > diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> > b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> > index 6cbd4a668c9a..df586035c33e 100644
> > --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> > +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> > @@ -562,7 +562,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
> >               i915_vma_close(vma);
> >
> >               i915_gem_object_unpin_pages(obj);
> > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> > +             __i915_gem_object_put_pages(obj);
> >               i915_gem_object_put(obj);
> >       }
> >
> > @@ -590,7 +590,7 @@ static void close_object_list(struct list_head *objects,
> >
> >               list_del(&obj->st_link);
> >               i915_gem_object_unpin_pages(obj);
> > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> > +             __i915_gem_object_put_pages(obj);
> >               i915_gem_object_put(obj);
> >       }
> >  }
> > @@ -860,7 +860,7 @@ static int igt_mock_ppgtt_64K(void *arg)
> >                       i915_vma_close(vma);
> >
> >                       i915_gem_object_unpin_pages(obj);
> > -                     __i915_gem_object_put_pages(obj,
> > I915_MM_NORMAL);
> > +                     __i915_gem_object_put_pages(obj);
> >                       i915_gem_object_put(obj);
> >               }
> >       }
> > @@ -1268,7 +1268,7 @@ static int igt_ppgtt_exhaust_huge(void *arg)
> >                       }
> >
> >                       i915_gem_object_unpin_pages(obj);
> > -                     __i915_gem_object_put_pages(obj,
> > I915_MM_NORMAL);
> > +                     __i915_gem_object_put_pages(obj);
> >                       i915_gem_object_put(obj);
> >               }
> >       }
> > @@ -1330,7 +1330,7 @@ static int igt_ppgtt_internal_huge(void *arg)
> >               }
> >
> >               i915_gem_object_unpin_pages(obj);
> > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> > +             __i915_gem_object_put_pages(obj);
> >               i915_gem_object_put(obj);
> >       }
> >
> > @@ -1399,7 +1399,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
> >               }
> >
> >               i915_gem_object_unpin_pages(obj);
> > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> > +             __i915_gem_object_put_pages(obj);
> >               i915_gem_object_put(obj);
> >       }
> >
> > --
> > 2.23.0.rc1
>
> -----Original Message-----

> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch]

> Sent: Friday, August 16, 2019 3:03 PM

> To: Chris Wilson <chris@chris-wilson.co.uk>

> Cc: Intel Graphics Development <intel-gfx@lists.freedesktop.org>; Tang, CQ

> <cq.tang@intel.com>; Ursulin, Tvrtko <tvrtko.ursulin@intel.com>; Joonas

> Lahtinen <joonas.lahtinen@linux.intel.com>; Vetter, Daniel

> <daniel.vetter@intel.com>

> Subject: Re: [PATCH] drm/i915: Switch obj->mm.lock lockdep annotations on

> its head

> 

> On Fri, Aug 16, 2019 at 8:46 PM Chris Wilson <chris@chris-wilson.co.uk>

> wrote:

> >

> > Quoting Daniel Vetter (2019-08-16 19:23:36)

> > > The trouble with having a plain nesting flag for locks which do not

> > > naturally nest (unlike block devices and their partitions, which is

> > > the original motivation for nesting levels) is that lockdep will

> > > never spot a true deadlock if you screw up.

> > >

> > > This patch is an attempt at trying better, by highlighting a bit

> > > more the actual nature of the nesting that's going on. Essentially

> > > we have two kinds of objects:

> > >

> > > - objects without pages allocated, which cannot be on any lru and are

> > >   hence inaccessible to the shrinker.

> > >

> > > - objects which have pages allocated, which are on an lru, and which

> > >   the shrinker can decide to throw out.

> > >

> > > For the former type of object, memory allcoations while holding

> > > obj->mm.lock are permissible. For the latter they are not. And

> > > get/put_pages transitions between the two types of objects.

> > >

> > > This is still not entirely fool-proof since the rules might chance.

> > > But as long as we run such a code ever at runtime lockdep should be

> > > able to observe the inconsistency and complain (like with any other

> > > lockdep class that we've split up in multiple classes). But there

> > > are a few clear benefits:

> > >

> > > - We can drop the nesting flag parameter from

> > >   __i915_gem_object_put_pages, because that function by definition is

> > >   never going allocate memory, and calling it on an object which

> > >   doesn't have its pages allocated would be a bug.

> > >

> > > - We strictly catch more bugs, since there's not only one place in the

> > >   entire tree which is annotated with the special class. All the

> > >   other places that had explicit lockdep nesting annotations we're now

> > >   going to leave up to lockdep again.

> > >

> > > - Specifically this catches stuff like calling get_pages from

> > >   put_pages (which isn't really a good idea, if we can call put_pages

> > >   so could the shrinker). I've seen patches do exactly that.

> > >

> > > Of course I fully expect CI will show me for the fool I am with this

> > > one here :-)

> > >

> > > v2: There can only be one (lockdep only has a cache for the first

> > > subclass, not for deeper ones, and we don't want to make these locks

> > > even slower). Still separate enums for better documentation.

> > >

> > > Real fix: don forget about phys objs and pin_map(), and fix the

> > > shrinker to have the right annotations ... silly me.

> > >

> > > v3: Forgot usertptr too ...

> > >

> > > v4: Improve comment for pages_pin_count, drop the IMPORTANT

> comment

> > > and instead prime lockdep (Chris).

> > >

> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>

> > > Cc: "Tang, CQ" <cq.tang@intel.com>

> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> > > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

> > > ---

> > >  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-

> > >  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16

> +++++++++++++---

> > >  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-

> > >  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----

> > >  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-

> > >  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---

> > >  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--

> > >  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------

> > >  8 files changed, 45 insertions(+), 22 deletions(-)

> >

> > static inline int __must_check

> > i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) {

> >         might_lock(&obj->mm.lock);

> >

> >         if (atomic_inc_not_zero(&obj->mm.pages_pin_count))

> >                 return 0;

> >

> >         return __i915_gem_object_get_pages(obj); }

> >

> > is now testing the wrong lock class.

> 

> Unfortunately there's no might_lock_nested.

> 

> But then, this is the best kind of wrong, because of the nesting we have:

> 

> obj->mm.lock#I915_MM_GET_PAGES -> fs_reclaim -> obj->mm.lock

> 

> So the might_lock we have actually checks for way more than just the "more

> correct" annotation. I think I'll just add the above as a comment and leave

> the code as-is. Thoughts?


I believe we should allow recursive call to i915_gem_object_pin_pages(),  if the object is already pinned, the next call just bump up the pin count and return. Otherwise, you only allow paired call:
                  I915_gem_object_pin_pages(obj);
	  I915_gem_object_unpin_pages(obj);

Sometimes we need do this:
	I915_gem_object_pin_pages(obj);
	.....
	I915_gem_object_pin_pages(obj);
	I915_gem_object_unpin_pages(obj);
	.....
	I915_gem_object_unpin_pages(obj);

The nested call is deep in the calling stack.  For example, we pin an object when doing put_pages(),  in put_pages() if we do swapping out, the blitter copying function will pin this object again, even though it is already pinned.

--CQ

> 

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > index 3929c3a6b281..d01258b175f5 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > @@ -22,6 +22,8 @@

> > >   *

> > >   */

> > >

> > > +#include <linux/sched/mm.h>

> > > +

> > >  #include "display/intel_frontbuffer.h"

> > >  #include "gt/intel_gt.h"

> > >  #include "i915_drv.h"

> > > @@ -61,6 +63,15 @@ void i915_gem_object_init(struct

> > > drm_i915_gem_object *obj,  {

> > >         mutex_init(&obj->mm.lock);

> > >

> > > +       if (IS_ENABLED(CONFIG_LOCKDEP)) {

> > > +               mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);

> > > +               fs_reclaim_acquire(GFP_KERNEL);

> > > +               might_lock(&obj->mm.lock);

> > > +               fs_reclaim_release(GFP_KERNEL);

> > > +               mutex_unlock(&obj->mm.lock);

> > > +       }

> >

> > This is very powerful and sells a lot of churn.

> 

> Yeah that was the idea here. Plus I hope it's the easier to understand the

> annotations and lock nesting rules for obj->mm.lock this way - I freaked out

> quite a bit about the current one until you convinced me (which took it's

> sweet time) that it's all fine. Maybe explicitly annotating get_pages and it's

> special rule will help others (I can't play guinea pig twice unfortunately, so we

> can't test that theory).

> -Daniel

> --

> Daniel Vetter

> Software Engineer, Intel Corporation

> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> -----Original Message-----

> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch]

> Sent: Friday, August 16, 2019 3:08 PM

> To: Tang, CQ <cq.tang@intel.com>

> Cc: Intel Graphics Development <intel-gfx@lists.freedesktop.org>; Chris

> Wilson <chris@chris-wilson.co.uk>; Ursulin, Tvrtko

> <tvrtko.ursulin@intel.com>; Joonas Lahtinen

> <joonas.lahtinen@linux.intel.com>; Vetter, Daniel <daniel.vetter@intel.com>

> Subject: Re: [PATCH] drm/i915: Switch obj->mm.lock lockdep annotations on

> its head

> 

> On Fri, Aug 16, 2019 at 9:23 PM Tang, CQ <cq.tang@intel.com> wrote:

> >

> >

> >

> > > -----Original Message-----

> > > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch]

> > > Sent: Friday, August 16, 2019 11:24 AM

> > > To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>

> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>; Chris Wilson

> > > <chris@chris- wilson.co.uk>; Tang, CQ <cq.tang@intel.com>; Ursulin,

> > > Tvrtko <tvrtko.ursulin@intel.com>; Joonas Lahtinen

> > > <joonas.lahtinen@linux.intel.com>; Vetter, Daniel

> > > <daniel.vetter@intel.com>

> > > Subject: [PATCH] drm/i915: Switch obj->mm.lock lockdep annotations

> > > on its head

> > >

> > > The trouble with having a plain nesting flag for locks which do not

> > > naturally nest (unlike block devices and their partitions, which is

> > > the original motivation for nesting levels) is that lockdep will

> > > never spot a true deadlock if you screw up.

> > >

> > > This patch is an attempt at trying better, by highlighting a bit

> > > more the actual nature of the nesting that's going on. Essentially

> > > we have two kinds of

> > > objects:

> > >

> > > - objects without pages allocated, which cannot be on any lru and are

> > >   hence inaccessible to the shrinker.

> > >

> > > - objects which have pages allocated, which are on an lru, and which

> > >   the shrinker can decide to throw out.

> > >

> > > For the former type of object, memory allcoations while holding

> > > obj->mm.lock are permissible. For the latter they are not. And

> > > get/put_pages transitions between the two types of objects.

> > >

> > > This is still not entirely fool-proof since the rules might chance.

> > > But as long as we run such a code ever at runtime lockdep should be

> > > able to observe the inconsistency and complain (like with any other

> > > lockdep class that we've split up in multiple classes). But there are a few

> clear benefits:

> > >

> > > - We can drop the nesting flag parameter from

> > >   __i915_gem_object_put_pages, because that function by definition is

> > >   never going allocate memory, and calling it on an object which

> > >   doesn't have its pages allocated would be a bug.

> > >

> > > - We strictly catch more bugs, since there's not only one place in the

> > >   entire tree which is annotated with the special class. All the

> > >   other places that had explicit lockdep nesting annotations we're now

> > >   going to leave up to lockdep again.

> > >

> > > - Specifically this catches stuff like calling get_pages from

> > >   put_pages (which isn't really a good idea, if we can call put_pages

> > >   so could the shrinker). I've seen patches do exactly that.

> > >

> > > Of course I fully expect CI will show me for the fool I am with this

> > > one here :-)

> > >

> > > v2: There can only be one (lockdep only has a cache for the first

> > > subclass, not for deeper ones, and we don't want to make these locks

> > > even slower). Still separate enums for better documentation.

> > >

> > > Real fix: don forget about phys objs and pin_map(), and fix the

> > > shrinker to have the right annotations ... silly me.

> > >

> > > v3: Forgot usertptr too ...

> > >

> > > v4: Improve comment for pages_pin_count, drop the IMPORTANT

> comment

> > > and instead prime lockdep (Chris).

> > >

> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>

> > > Cc: "Tang, CQ" <cq.tang@intel.com>

> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> > > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

> > > ---

> > >  drivers/gpu/drm/i915/gem/i915_gem_object.c       | 13 ++++++++++++-

> > >  drivers/gpu/drm/i915/gem/i915_gem_object.h       | 16

> +++++++++++++---

> > >  drivers/gpu/drm/i915/gem/i915_gem_object_types.h |  6 +++++-

> > >  drivers/gpu/drm/i915/gem/i915_gem_pages.c        |  9 ++++-----

> > >  drivers/gpu/drm/i915/gem/i915_gem_phys.c         |  2 +-

> > >  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c     |  5 ++---

> > >  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  4 ++--

> > >  drivers/gpu/drm/i915/gem/selftests/huge_pages.c  | 12 ++++++------

> > >  8 files changed, 45 insertions(+), 22 deletions(-)

> > >

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > index 3929c3a6b281..d01258b175f5 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c

> > > @@ -22,6 +22,8 @@

> > >   *

> > >   */

> > >

> > > +#include <linux/sched/mm.h>

> > > +

> > >  #include "display/intel_frontbuffer.h"

> > >  #include "gt/intel_gt.h"

> > >  #include "i915_drv.h"

> > > @@ -61,6 +63,15 @@ void i915_gem_object_init(struct

> > > drm_i915_gem_object *obj,  {

> > >       mutex_init(&obj->mm.lock);

> > >

> > > +     if (IS_ENABLED(CONFIG_LOCKDEP)) {

> > > +             mutex_lock_nested(&obj->mm.lock,

> > > I915_MM_GET_PAGES);

> > > +             fs_reclaim_acquire(GFP_KERNEL);

> > > +             might_lock(&obj->mm.lock);

> > > +             fs_reclaim_release(GFP_KERNEL);

> > > +             mutex_unlock(&obj->mm.lock);

> > > +     }

> > > +

> > > +

> > >       spin_lock_init(&obj->vma.lock);

> > >       INIT_LIST_HEAD(&obj->vma.list);

> > >

> > > @@ -191,7 +202,7 @@ static void __i915_gem_free_objects(struct

> > > drm_i915_private *i915,

> > >               GEM_BUG_ON(!list_empty(&obj->lut_list));

> > >

> > >               atomic_set(&obj->mm.pages_pin_count, 0);

> > > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);

> > > +             __i915_gem_object_put_pages(obj);

> > >               GEM_BUG_ON(i915_gem_object_has_pages(obj));

> > >               bitmap_free(obj->bit_17);

> > >

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h

> > > b/drivers/gpu/drm/i915/gem/i915_gem_object.h

> > > index 3714cf234d64..5ce511ca7fa8 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h

> > > @@ -281,11 +281,21 @@ i915_gem_object_unpin_pages(struct

> > > drm_i915_gem_object *obj)

> > >

> > >  enum i915_mm_subclass { /* lockdep subclass for obj-

> > > >mm.lock/struct_mutex */

> > >       I915_MM_NORMAL = 0,

> > > -     I915_MM_SHRINKER /* called "recursively" from direct-reclaim-

> > > esque */

> > > +     /*

> > > +      * Only used by struct_mutex, when called "recursively" from

> > > +      * direct-reclaim-esque. Safe because there is only every one

> > > +      * struct_mutex in the entire system. */

> > > +     I915_MM_SHRINKER = 1,

> > > +     /*

> > > +      * Used for obj->mm.lock when allocating pages. Safe because

> > > + the

> > > object

> > > +      * isn't yet on any LRU, and therefore the shrinker can't deadlock on

> > > +      * it. As soon as the object has pages, obj->mm.lock nests within

> > > +      * fs_reclaim.

> > > +      */

> > > +     I915_MM_GET_PAGES = 1,

> >

> > If both have the same value, why bother to use two names? Can we use a

> single generic name?

> 

> They're two totally different things. The commit message explains why I've

> picked the same value for both.

> 

> I mean you're essentially arguing (thought to the extreme conclusion at least)

> that every #define SOMETHING 1 should be replaced by the same define.

> That defeats the point of having meaningful names for values ...


It makes some sense, isn't it better to define two sets of enum, one for 'struct_mutex', one for obj->mm.lock ?  Or do both still have some connection ?

--CQ

> 

> Cheers, Daniel

> 

> >

> > --CQ

> >

> > >  };

> > >

> > > -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

> > > -                             enum i915_mm_subclass subclass);

> > > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);

> > >  void i915_gem_object_truncate(struct drm_i915_gem_object *obj);

> > > void i915_gem_object_writeback(struct drm_i915_gem_object *obj);

> > >

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

> > > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

> > > index d474c6ac4100..42d114f27d1a 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h

> > > @@ -157,7 +157,11 @@ struct drm_i915_gem_object {

> > >       unsigned int pin_global;

> > >

> > >       struct {

> > > -             struct mutex lock; /* protects the pages and their use */

> > > +             /*

> > > +              * Protects the pages and their use. Do not use directly, but

> > > +              * instead go through the pin/unpin interfaces.

> > > +              */

> > > +             struct mutex lock;

> > >               atomic_t pages_pin_count;

> > >

> > >               struct sg_table *pages; diff --git

> > > a/drivers/gpu/drm/i915/gem/i915_gem_pages.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_pages.c

> > > index 18f0ce0135c1..202526e8910f 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c

> > > @@ -101,7 +101,7 @@ int __i915_gem_object_get_pages(struct

> > > drm_i915_gem_object *obj)  {

> > >       int err;

> > >

> > > -     err = mutex_lock_interruptible(&obj->mm.lock);

> > > +     err = mutex_lock_interruptible_nested(&obj->mm.lock,

> > > +I915_MM_GET_PAGES);

> > >       if (err)

> > >               return err;

> > >

> > > @@ -179,8 +179,7 @@ __i915_gem_object_unset_pages(struct

> > > drm_i915_gem_object *obj)

> > >       return pages;

> > >  }

> > >

> > > -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,

> > > -                             enum i915_mm_subclass subclass)

> > > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)

> > >  {

> > >       struct sg_table *pages;

> > >       int err;

> > > @@ -191,7 +190,7 @@ int __i915_gem_object_put_pages(struct

> > > drm_i915_gem_object *obj,

> > >       GEM_BUG_ON(atomic_read(&obj->bind_count));

> > >

> > >       /* May be called by shrinker from within get_pages() (on

> > > another bo) */

> > > -     mutex_lock_nested(&obj->mm.lock, subclass);

> > > +     mutex_lock(&obj->mm.lock);

> > >       if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {

> > >               err = -EBUSY;

> > >               goto unlock;

> > > @@ -285,7 +284,7 @@ void *i915_gem_object_pin_map(struct

> > > drm_i915_gem_object *obj,

> > >       if (unlikely(!i915_gem_object_has_struct_page(obj)))

> > >               return ERR_PTR(-ENXIO);

> > >

> > > -     err = mutex_lock_interruptible(&obj->mm.lock);

> > > +     err = mutex_lock_interruptible_nested(&obj->mm.lock,

> > > +I915_MM_GET_PAGES);

> > >       if (err)

> > >               return ERR_PTR(err);

> > >

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_phys.c

> > > index 102fd7a23d3d..209925be8a76 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c

> > > @@ -156,7 +156,7 @@ int i915_gem_object_attach_phys(struct

> > > drm_i915_gem_object *obj, int align)

> > >       if (err)

> > >               return err;

> > >

> > > -     mutex_lock(&obj->mm.lock);

> > > +     mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);

> > >

> > >       if (obj->mm.madv != I915_MADV_WILLNEED) {

> > >               err = -EFAULT;

> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

> > > index edd21d14e64f..0b0d6e27b996 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

> > > @@ -98,7 +98,7 @@ static bool unsafe_drop_pages(struct

> > > drm_i915_gem_object *obj,

> > >               flags = I915_GEM_OBJECT_UNBIND_ACTIVE;

> > >

> > >       if (i915_gem_object_unbind(obj, flags) == 0)

> > > -             __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);

> > > +             __i915_gem_object_put_pages(obj);

> > >

> > >       return !i915_gem_object_has_pages(obj);  } @@ -254,8 +254,7 @@

> > > i915_gem_shrink(struct drm_i915_private *i915,

> > >

> > >                       if (unsafe_drop_pages(obj, shrink)) {

> > >                               /* May arrive from get_pages on

> > > another bo */

> > > -                             mutex_lock_nested(&obj->mm.lock,

> > > -                                               I915_MM_SHRINKER);

> > > +                             mutex_lock(&obj->mm.lock);

> > >                               if (!i915_gem_object_has_pages(obj)) {

> > >                                       try_to_writeback(obj, shrink);

> > >                                       count += obj->base.size >>

> > > PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c

> > > b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c

> > > index 70dc506a5426..f3b3bc7c32cb 100644

> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c

> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c

> > > @@ -158,7 +158,7 @@ userptr_mn_invalidate_range_start(struct

> > > mmu_notifier *_mn,

> > >               ret = i915_gem_object_unbind(obj,

> > >

> > > I915_GEM_OBJECT_UNBIND_ACTIVE);

> > >               if (ret == 0)

> > > -                     ret = __i915_gem_object_put_pages(obj,

> > > I915_MM_SHRINKER);

> > > +                     ret = __i915_gem_object_put_pages(obj);

> > >               i915_gem_object_put(obj);

> > >               if (ret)

> > >                       goto unlock;

> > > @@ -514,7 +514,7 @@ __i915_gem_userptr_get_pages_worker(struct

> > > work_struct *_work)

> > >               }

> > >       }

> > >

> > > -     mutex_lock(&obj->mm.lock);

> > > +     mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);

> > >       if (obj->userptr.work == &work->work) {

> > >               struct sg_table *pages = ERR_PTR(ret);

> > >

> > > diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c

> > > b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c

> > > index 6cbd4a668c9a..df586035c33e 100644

> > > --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c

> > > +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c

> > > @@ -562,7 +562,7 @@ static int igt_mock_ppgtt_misaligned_dma(void

> *arg)

> > >               i915_vma_close(vma);

> > >

> > >               i915_gem_object_unpin_pages(obj);

> > > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);

> > > +             __i915_gem_object_put_pages(obj);

> > >               i915_gem_object_put(obj);

> > >       }

> > >

> > > @@ -590,7 +590,7 @@ static void close_object_list(struct list_head

> > > *objects,

> > >

> > >               list_del(&obj->st_link);

> > >               i915_gem_object_unpin_pages(obj);

> > > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);

> > > +             __i915_gem_object_put_pages(obj);

> > >               i915_gem_object_put(obj);

> > >       }

> > >  }

> > > @@ -860,7 +860,7 @@ static int igt_mock_ppgtt_64K(void *arg)

> > >                       i915_vma_close(vma);

> > >

> > >                       i915_gem_object_unpin_pages(obj);

> > > -                     __i915_gem_object_put_pages(obj,

> > > I915_MM_NORMAL);

> > > +                     __i915_gem_object_put_pages(obj);

> > >                       i915_gem_object_put(obj);

> > >               }

> > >       }

> > > @@ -1268,7 +1268,7 @@ static int igt_ppgtt_exhaust_huge(void *arg)

> > >                       }

> > >

> > >                       i915_gem_object_unpin_pages(obj);

> > > -                     __i915_gem_object_put_pages(obj,

> > > I915_MM_NORMAL);

> > > +                     __i915_gem_object_put_pages(obj);

> > >                       i915_gem_object_put(obj);

> > >               }

> > >       }

> > > @@ -1330,7 +1330,7 @@ static int igt_ppgtt_internal_huge(void *arg)

> > >               }

> > >

> > >               i915_gem_object_unpin_pages(obj);

> > > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);

> > > +             __i915_gem_object_put_pages(obj);

> > >               i915_gem_object_put(obj);

> > >       }

> > >

> > > @@ -1399,7 +1399,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)

> > >               }

> > >

> > >               i915_gem_object_unpin_pages(obj);

> > > -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);

> > > +             __i915_gem_object_put_pages(obj);

> > >               i915_gem_object_put(obj);

> > >       }

> > >

> > > --

> > > 2.23.0.rc1

> >

> 

> 

> --

> Daniel Vetter

> Software Engineer, Intel Corporation

> +41 (0) 79 365 57 48 - http://blog.ffwll.ch