[3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6

Submitted by Yang, Philip on Feb. 4, 2019, 6:23 p.m.

Details

Message ID 20190204182237.2641-4-Philip.Yang@amd.com
State New
Headers show
Series "Use HMM to replace get_user_pages" ( rev: 2 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Yang, Philip Feb. 4, 2019, 6:23 p.m.
Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 158 +++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  25 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 173 ++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-
 9 files changed, 198 insertions(+), 277 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0b31a1859023..0e1711a75b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@  struct kgd_mem {
 
 	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
-	struct page **user_pages;
 
 	struct amdgpu_sync sync;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..ae2d838d31ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -582,28 +582,12 @@  static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		goto out;
 	}
 
-	/* If no restore worker is running concurrently, user_pages
-	 * should not be allocated
-	 */
-	WARN(mem->user_pages, "Leaking user_pages array");
-
-	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					   sizeof(struct page *),
-					   GFP_KERNEL | __GFP_ZERO);
-	if (!mem->user_pages) {
-		pr_err("%s: Failed to allocate pages array\n", __func__);
-		ret = -ENOMEM;
-		goto unregister_out;
-	}
-
-	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
 	if (ret) {
 		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-		goto free_out;
+		goto unregister_out;
 	}
 
-	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
 	ret = amdgpu_bo_reserve(bo, true);
 	if (ret) {
 		pr_err("%s: Failed to reserve BO\n", __func__);
@@ -616,11 +600,7 @@  static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-	kvfree(mem->user_pages);
-	mem->user_pages = NULL;
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
 	if (ret)
 		amdgpu_mn_unregister(bo);
@@ -679,7 +659,6 @@  static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -743,7 +722,6 @@  static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
@@ -1371,15 +1349,6 @@  int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
-	/* Free user pages if necessary */
-	if (mem->user_pages) {
-		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
-		kvfree(mem->user_pages);
-	}
-
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1855,25 +1824,11 @@  static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 
 		bo = mem->bo;
 
-		if (!mem->user_pages) {
-			mem->user_pages =
-				kvmalloc_array(bo->tbo.ttm->num_pages,
-						 sizeof(struct page *),
-						 GFP_KERNEL | __GFP_ZERO);
-			if (!mem->user_pages) {
-				pr_err("%s: Failed to allocate pages array\n",
-				       __func__);
-				return -ENOMEM;
-			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-		}
-
 		/* Get updated user pages */
 		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-						   mem->user_pages);
+						   bo->tbo.ttm->pages);
 		if (ret) {
-			mem->user_pages[0] = NULL;
+			bo->tbo.ttm->pages[0] = NULL;
 			pr_info("%s: Failed to get user pages: %d\n",
 				__func__, ret);
 			/* Pretend it succeeded. It will fail later
@@ -1882,12 +1837,6 @@  static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 * stalled user mode queues.
 			 */
 		}
-
-		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
-		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
 	}
 
 	return 0;
@@ -1917,7 +1866,8 @@  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 				     GFP_KERNEL);
 	if (!pd_bo_list_entries) {
 		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_no_mem;
 	}
 
 	INIT_LIST_HEAD(&resv_list);
@@ -1941,7 +1891,7 @@  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
 	WARN(!list_empty(&duplicates), "Duplicates should be empty");
 	if (ret)
-		goto out;
+		goto out_free;
 
 	amdgpu_sync_create(&sync);
 
@@ -1967,10 +1917,8 @@  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 
 		bo = mem->bo;
 
-		/* Copy pages array and validate the BO if we got user pages */
-		if (mem->user_pages[0]) {
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     mem->user_pages);
+		/* Validate the BO if we got user pages */
+		if (bo->tbo.ttm->pages[0]) {
 			amdgpu_bo_placement_from_domain(bo, mem->domain);
 			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (ret) {
@@ -1979,16 +1927,16 @@  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			}
 		}
 
-		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
-		 */
-		kvfree(mem->user_pages);
-		mem->user_pages = NULL;
 		list_move_tail(&mem->validate_list.head,
 			       &process_info->userptr_valid_list);
 
+		/* Stop HMM track the userptr update. We dont check the return
+		 * value for concurrent CPU page table update because we will
+		 * reschedule the restore worker if process_info->evicted_bos
+		 * is updated.
+		 */
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
 		 * clear the page table entries, which will result in
@@ -2022,8 +1970,15 @@  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
 	amdgpu_sync_free(&sync);
-out:
+out_free:
 	kfree(pd_bo_list_entries);
+out_no_mem:
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		bo = mem->bo;
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@  struct amdgpu_bo_list_entry {
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
 	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };
 
 struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..451a1fab17d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -52,7 +52,6 @@  static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.tv.bo = &bo->tbo;
 	/* One for TTM and one for the CS job */
 	p->uf_entry.tv.num_shared = 2;
-	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
@@ -539,14 +538,14 @@  static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated && lobj->user_pages) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
+
 			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 						     lobj->user_pages);
 			binding_userptr = true;
@@ -577,7 +576,6 @@  static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
@@ -613,79 +611,45 @@  static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
-	while (1) {
-		struct list_head need_pages;
-
-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
-		}
-
-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
-
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-			}
-
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
-			}
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		bool userpage_invalidated = false;
+		int i;
+
+		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					sizeof(struct page *),
+					GFP_KERNEL | __GFP_ZERO);
+		if (!e->user_pages) {
+			DRM_ERROR("calloc failure\n");
+			return -ENOMEM;
 		}
 
-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
+		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
+		if (r) {
+			kvfree(e->user_pages);
+			e->user_pages = NULL;
+			return r;
 		}
 
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
-
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
+		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+				userpage_invalidated = true;
+				break;
 			}
 		}
+		e->user_invalidated = userpage_invalidated;
+	}
 
-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -754,17 +718,7 @@  static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }
 
@@ -1213,8 +1167,7 @@  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
-	int r;
+	int r = 0;
 
 	job = p->job;
 	p->job = NULL;
@@ -1223,15 +1176,21 @@  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -ERESTARTSYS;
+		goto error_abort;
 	}
 
 	job->owner = p->filp;
@@ -1278,14 +1237,20 @@  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	union drm_amdgpu_cs *cs = data;
-	struct amdgpu_cs_parser parser = {};
-	bool reserved_buffers = false;
+	union drm_amdgpu_cs *cs;
+	struct amdgpu_cs_parser parser;
+	bool reserved_buffers;
+	int tries = 10;
 	int i, r;
 
 	if (!adev->accel_working)
 		return -EBUSY;
 
+restart:
+	memset(&parser, 0, sizeof(parser));
+	cs = data;
+	reserved_buffers = false;
+
 	parser.adev = adev;
 	parser.filp = filp;
 
@@ -1327,6 +1292,15 @@  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
+	if (r == -ERESTARTSYS) {
+		if (!--tries) {
+			DRM_ERROR("Possible deadlock? Retry too many times\n");
+			return -EDEADLK;
+		}
+		goto restart;
+	}
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d21dd2f369da..555285e329ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -329,26 +329,24 @@  int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 
 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;
 
 	args->handle = handle;
-	return 0;
 
-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 release_object:
 	drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e356867d2308..fa2516016c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -222,8 +222,6 @@  static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 			true, false, MAX_SCHEDULE_TIMEOUT);
 		if (r <= 0)
 			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 	}
 }
 
@@ -504,3 +502,26 @@  void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	mutex_unlock(&adev->mn_lock);
 }
 
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+		(1 << 0), /* HMM_PFN_VALID */
+		(1 << 1), /* HMM_PFN_WRITE */
+		0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+		0, /* HMM_PFN_NONE */
+		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+	if (range) {
+		range->flags = hmm_range_flags;
+		range->values = hmm_range_values;
+		range->pfn_shift = PAGE_SHIFT;
+		range->pfns = NULL;
+		INIT_LIST_HEAD(&range->list);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 0a51fd00021c..4803e216e174 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,9 +25,10 @@ 
 #define __AMDGPU_MN_H__
 
 /*
- * MMU Notifier
+ * HMM mirror
  */
 struct amdgpu_mn;
+struct hmm_range;
 
 enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_GFX,
@@ -41,6 +42,7 @@  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 73e71e61dc99..3fd7851da0c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@ 
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/hmm.h>
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -705,11 +706,6 @@  static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 /*
  * TTM backend functions.
  */
-struct amdgpu_ttm_gup_task_list {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
@@ -718,85 +714,96 @@  struct amdgpu_ttm_tt {
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
-	atomic_t		mmu_invalidations;
-	uint32_t		last_set_pages;
+	struct hmm_range	range;
 };
 
 /**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
  *
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
  */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct mm_struct *mm = gtt->usertask->mm;
-	unsigned int flags = 0;
-	unsigned pinned = 0;
-	int r;
+	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+	struct hmm_range *range = &gtt->range;
+	int r = 0, i;
 
 	if (!mm) /* Happens during process shutdown */
 		return -ESRCH;
 
-	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-		flags |= FOLL_WRITE;
+	amdgpu_hmm_init_range(range);
 
 	down_read(&mm->mmap_sem);
 
-	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/*
-		 * check that we only use anonymous memory to prevent problems
-		 * with writeback
-		 */
-		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-		struct vm_area_struct *vma;
+	range->vma = find_vma(mm, gtt->userptr);
+	if (!range_in_vma(range->vma, gtt->userptr, end))
+		r = -EFAULT;
+	else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+		range->vma->vm_file)
+		r = -EPERM;
+	if (r)
+		goto out;
 
-		vma = find_vma(mm, gtt->userptr);
-		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&mm->mmap_sem);
-			return -EPERM;
-		}
+	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+				     GFP_KERNEL);
+	if (range->pfns == NULL) {
+		r = -ENOMEM;
+		goto out;
 	}
+	range->start = gtt->userptr;
+	range->end = end;
 
-	/* loop enough times using contiguous pages of memory */
-	do {
-		unsigned num_pages = ttm->num_pages - pinned;
-		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **p = pages + pinned;
-		struct amdgpu_ttm_gup_task_list guptask;
+	range->pfns[0] = range->flags[HMM_PFN_VALID];
+	range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+				0 : range->flags[HMM_PFN_WRITE];
+	for (i = 1; i < ttm->num_pages; i++)
+		range->pfns[i] = range->pfns[0];
 
-		guptask.task = current;
-		spin_lock(&gtt->guptasklock);
-		list_add(&guptask.list, &gtt->guptasks);
-		spin_unlock(&gtt->guptasklock);
+	/* This may trigger page table update */
+	r = hmm_vma_fault(range, true);
+	if (r)
+		goto out_free_pfns;
 
-		if (mm == current->mm)
-			r = get_user_pages(userptr, num_pages, flags, p, NULL);
-		else
-			r = get_user_pages_remote(gtt->usertask,
-					mm, userptr, num_pages,
-					flags, p, NULL, NULL);
+	up_read(&mm->mmap_sem);
 
-		spin_lock(&gtt->guptasklock);
-		list_del(&guptask.list);
-		spin_unlock(&gtt->guptasklock);
+	for (i = 0; i < ttm->num_pages; i++)
+		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
 
-		if (r < 0)
-			goto release_pages;
+	return 0;
 
-		pinned += r;
+out_free_pfns:
+	kvfree(range->pfns);
+	range->pfns = NULL;
+out:
+	up_read(&mm->mmap_sem);
+	return r;
+}
 
-	} while (pinned < ttm->num_pages);
+/**
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
+ *
+ * Returns: true if pages are still valid
+ */
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	bool r = false;
 
-	up_read(&mm->mmap_sem);
-	return 0;
+	if (!gtt || !gtt->userptr)
+		return false;
+
+	WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");
+	if (gtt->range.pfns) {
+		r = hmm_vma_range_done(&gtt->range);
+		kvfree(gtt->range.pfns);
+		gtt->range.pfns = NULL;
+	}
 
-release_pages:
-	release_pages(pages, pinned);
-	up_read(&mm->mmap_sem);
 	return r;
 }
 
@@ -809,16 +816,10 @@  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
  */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	unsigned i;
 
-	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
-	for (i = 0; i < ttm->num_pages; ++i) {
-		if (ttm->pages[i])
-			put_page(ttm->pages[i]);
-
+	for (i = 0; i < ttm->num_pages; ++i)
 		ttm->pages[i] = pages ? pages[i] : NULL;
-	}
 }
 
 /**
@@ -903,10 +904,11 @@  static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-	/* mark the pages as dirty */
-	amdgpu_ttm_tt_mark_user_pages(ttm);
-
 	sg_free_table(ttm->sg);
+
+	if (gtt->range.pfns &&
+	    ttm->pages[0] == hmm_pfn_to_page(&gtt->range, gtt->range.pfns[0]))
+		WARN_ONCE(1, "Missing get_user_page_done\n");
 }
 
 int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1258,8 +1260,6 @@  int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
-	atomic_set(&gtt->mmu_invalidations, 0);
-	gtt->last_set_pages = 0;
 
 	return 0;
 }
@@ -1289,7 +1289,6 @@  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
 	if (gtt == NULL || !gtt->userptr)
@@ -1302,48 +1301,20 @@  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
-	/* Search the lists of tasks that hold this mapping and see
-	 * if current is one of them.  If it is return false.
-	 */
-	spin_lock(&gtt->guptasklock);
-	list_for_each_entry(entry, &gtt->guptasks, list) {
-		if (entry->task == current) {
-			spin_unlock(&gtt->guptasklock);
-			return false;
-		}
-	}
-	spin_unlock(&gtt->guptasklock);
-
-	atomic_inc(&gtt->mmu_invalidations);
-
 	return true;
 }
 
 /**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
- */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
-				       int *last_invalidated)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	int prev_invalidated = *last_invalidated;
-
-	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
-	return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
  */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
-	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+	return true;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..8988c87fff9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -102,6 +102,7 @@  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
@@ -112,7 +113,7 @@  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,

Comments

Am 04.02.19 um 19:23 schrieb Yang, Philip:
> Use HMM helper function hmm_vma_fault() to get physical pages backing
> userptr and start CPU page table update track of those pages. Then use
> hmm_vma_range_done() to check if those pages are updated before
> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>
> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
> from scratch, for kfd, restore worker is rescheduled to retry.
>
> HMM simplify the CPU page table concurrent update check, so remove
> guptasklock, mmu_invalidations, last_set_pages fields from
> amdgpu_ttm_tt struct.
>
> HMM does not pin the page (increase page ref count), so remove related
> operations like release_pages(), put_page(), mark_page_dirty().
>
> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 158 +++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  25 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 173 ++++++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-
>   9 files changed, 198 insertions(+), 277 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 0b31a1859023..0e1711a75b68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -61,7 +61,6 @@ struct kgd_mem {
>   
>   	atomic_t invalid;
>   	struct amdkfd_process_info *process_info;
> -	struct page **user_pages;
>   
>   	struct amdgpu_sync sync;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d7b10d79f1de..ae2d838d31ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>   		goto out;
>   	}
>   
> -	/* If no restore worker is running concurrently, user_pages
> -	 * should not be allocated
> -	 */
> -	WARN(mem->user_pages, "Leaking user_pages array");
> -
> -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> -					   sizeof(struct page *),
> -					   GFP_KERNEL | __GFP_ZERO);
> -	if (!mem->user_pages) {
> -		pr_err("%s: Failed to allocate pages array\n", __func__);
> -		ret = -ENOMEM;
> -		goto unregister_out;
> -	}
> -
> -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
> +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>   	if (ret) {
>   		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> -		goto free_out;
> +		goto unregister_out;
>   	}
>   
> -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
> -
>   	ret = amdgpu_bo_reserve(bo, true);
>   	if (ret) {
>   		pr_err("%s: Failed to reserve BO\n", __func__);
> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>   	amdgpu_bo_unreserve(bo);
>   
>   release_out:
> -	if (ret)
> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -free_out:
> -	kvfree(mem->user_pages);
> -	mem->user_pages = NULL;
> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>   unregister_out:
>   	if (ret)
>   		amdgpu_mn_unregister(bo);
> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>   	ctx->kfd_bo.priority = 0;
>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>   	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>   
>   	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>   	ctx->kfd_bo.priority = 0;
>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>   	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>   
>   	i = 0;
> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	list_del(&bo_list_entry->head);
>   	mutex_unlock(&process_info->lock);
>   
> -	/* Free user pages if necessary */
> -	if (mem->user_pages) {
> -		pr_debug("%s: Freeing user_pages array\n", __func__);
> -		if (mem->user_pages[0])
> -			release_pages(mem->user_pages,
> -					mem->bo->tbo.ttm->num_pages);
> -		kvfree(mem->user_pages);
> -	}
> -
>   	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
>   	if (unlikely(ret))
>   		return ret;
> @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>   
>   		bo = mem->bo;
>   
> -		if (!mem->user_pages) {
> -			mem->user_pages =
> -				kvmalloc_array(bo->tbo.ttm->num_pages,
> -						 sizeof(struct page *),
> -						 GFP_KERNEL | __GFP_ZERO);
> -			if (!mem->user_pages) {
> -				pr_err("%s: Failed to allocate pages array\n",
> -				       __func__);
> -				return -ENOMEM;
> -			}
> -		} else if (mem->user_pages[0]) {
> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -		}
> -
>   		/* Get updated user pages */
>   		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
> -						   mem->user_pages);
> +						   bo->tbo.ttm->pages);
>   		if (ret) {
> -			mem->user_pages[0] = NULL;
> +			bo->tbo.ttm->pages[0] = NULL;
>   			pr_info("%s: Failed to get user pages: %d\n",
>   				__func__, ret);
>   			/* Pretend it succeeded. It will fail later
> @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>   			 * stalled user mode queues.
>   			 */
>   		}
> -
> -		/* Mark the BO as valid unless it was invalidated
> -		 * again concurrently
> -		 */
> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
> -			return -EAGAIN;
>   	}
>   
>   	return 0;
> @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   				     GFP_KERNEL);
>   	if (!pd_bo_list_entries) {
>   		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
> -		return -ENOMEM;
> +		ret = -ENOMEM;
> +		goto out_no_mem;
>   	}
>   
>   	INIT_LIST_HEAD(&resv_list);
> @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
>   	WARN(!list_empty(&duplicates), "Duplicates should be empty");
>   	if (ret)
> -		goto out;
> +		goto out_free;
>   
>   	amdgpu_sync_create(&sync);
>   
> @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   
>   		bo = mem->bo;
>   
> -		/* Copy pages array and validate the BO if we got user pages */
> -		if (mem->user_pages[0]) {
> -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
> -						     mem->user_pages);
> +		/* Validate the BO if we got user pages */
> +		if (bo->tbo.ttm->pages[0]) {
>   			amdgpu_bo_placement_from_domain(bo, mem->domain);
>   			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   			if (ret) {
> @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   			}
>   		}
>   
> -		/* Validate succeeded, now the BO owns the pages, free
> -		 * our copy of the pointer array. Put this BO back on
> -		 * the userptr_valid_list. If we need to revalidate
> -		 * it, we need to start from scratch.
> -		 */
> -		kvfree(mem->user_pages);
> -		mem->user_pages = NULL;
>   		list_move_tail(&mem->validate_list.head,
>   			       &process_info->userptr_valid_list);
>   
> +		/* Stop HMM track the userptr update. We dont check the return
> +		 * value for concurrent CPU page table update because we will
> +		 * reschedule the restore worker if process_info->evicted_bos
> +		 * is updated.
> +		 */
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +
>   		/* Update mapping. If the BO was not validated
>   		 * (because we couldn't get user pages), this will
>   		 * clear the page table entries, which will result in
> @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   	ttm_eu_backoff_reservation(&ticket, &resv_list);
>   	amdgpu_sync_wait(&sync, false);
>   	amdgpu_sync_free(&sync);
> -out:
> +out_free:
>   	kfree(pd_bo_list_entries);
> +out_no_mem:
> +	list_for_each_entry_safe(mem, tmp_mem,
> +				 &process_info->userptr_inval_list,
> +				 validate_list.head) {
> +		bo = mem->bo;
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +	}
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 7c5f5d1601e6..a130e766cbdb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
>   	struct amdgpu_bo_va		*bo_va;
>   	uint32_t			priority;
>   	struct page			**user_pages;
> -	int				user_invalidated;
> +	bool				user_invalidated;
>   };
>   
>   struct amdgpu_bo_list {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..451a1fab17d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>   	p->uf_entry.tv.bo = &bo->tbo;
>   	/* One for TTM and one for the CS job */
>   	p->uf_entry.tv.num_shared = 2;
> -	p->uf_entry.user_pages = NULL;
>   
>   	drm_gem_object_put_unlocked(gobj);
>   
> @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>   		if (usermm && usermm != current->mm)
>   			return -EPERM;
>   
> -		/* Check if we have user pages and nobody bound the BO already */
> -		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
> -		    lobj->user_pages) {
> +		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
> +		    lobj->user_invalidated && lobj->user_pages) {
>   			amdgpu_bo_placement_from_domain(bo,
>   							AMDGPU_GEM_DOMAIN_CPU);
>   			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   			if (r)
>   				return r;
> +
>   			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
>   						     lobj->user_pages);
>   			binding_userptr = true;
> @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   	struct amdgpu_bo *gds;
>   	struct amdgpu_bo *gws;
>   	struct amdgpu_bo *oa;
> -	unsigned tries = 10;
>   	int r;
>   
>   	INIT_LIST_HEAD(&p->validated);
> @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
>   		list_add(&p->uf_entry.tv.head, &p->validated);
>   
> -	while (1) {
> -		struct list_head need_pages;
> -
> -		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> -					   &duplicates);
> -		if (unlikely(r != 0)) {
> -			if (r != -ERESTARTSYS)
> -				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> -			goto error_free_pages;
> -		}
> -
> -		INIT_LIST_HEAD(&need_pages);
> -		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> -			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
> -				 &e->user_invalidated) && e->user_pages) {
> -
> -				/* We acquired a page array, but somebody
> -				 * invalidated it. Free it and try again
> -				 */
> -				release_pages(e->user_pages,
> -					      bo->tbo.ttm->num_pages);
> -				kvfree(e->user_pages);
> -				e->user_pages = NULL;
> -			}
> -
> -			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
> -			    !e->user_pages) {
> -				list_del(&e->tv.head);
> -				list_add(&e->tv.head, &need_pages);
> -
> -				amdgpu_bo_unreserve(bo);
> -			}
> +	/* Get userptr backing pages. If pages are updated after registered
> +	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
> +	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
> +	 */
> +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +		bool userpage_invalidated = false;
> +		int i;
> +
> +		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> +					sizeof(struct page *),
> +					GFP_KERNEL | __GFP_ZERO);
> +		if (!e->user_pages) {
> +			DRM_ERROR("calloc failure\n");
> +			return -ENOMEM;
>   		}
>   
> -		if (list_empty(&need_pages))
> -			break;
> -
> -		/* Unreserve everything again. */
> -		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> -
> -		/* We tried too many times, just abort */
> -		if (!--tries) {
> -			r = -EDEADLK;
> -			DRM_ERROR("deadlock in %s\n", __func__);
> -			goto error_free_pages;
> +		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
> +		if (r) {
> +			kvfree(e->user_pages);
> +			e->user_pages = NULL;
> +			return r;
>   		}
>   
> -		/* Fill the page arrays for all userptrs. */
> -		list_for_each_entry(e, &need_pages, tv.head) {
> -			struct ttm_tt *ttm = e->tv.bo->ttm;
> -
> -			e->user_pages = kvmalloc_array(ttm->num_pages,
> -							 sizeof(struct page*),
> -							 GFP_KERNEL | __GFP_ZERO);
> -			if (!e->user_pages) {
> -				r = -ENOMEM;
> -				DRM_ERROR("calloc failure in %s\n", __func__);
> -				goto error_free_pages;
> -			}
> -
> -			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
> -			if (r) {
> -				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
> -				kvfree(e->user_pages);
> -				e->user_pages = NULL;
> -				goto error_free_pages;
> +		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
> +			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
> +				userpage_invalidated = true;
> +				break;
>   			}
>   		}
> +		e->user_invalidated = userpage_invalidated;
> +	}
>   
> -		/* And try again. */
> -		list_splice(&need_pages, &p->validated);
> +	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> +				   &duplicates);
> +	if (unlikely(r != 0)) {
> +		if (r != -ERESTARTSYS)
> +			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> +		goto out;
>   	}
>   
>   	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   error_validate:
>   	if (r)
>   		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> -
> -error_free_pages:
> -
> -	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> -		if (!e->user_pages)
> -			continue;
> -
> -		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
> -		kvfree(e->user_pages);
> -	}
> -
> +out:
>   	return r;
>   }
>   
> @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	struct amdgpu_bo_list_entry *e;
>   	struct amdgpu_job *job;
>   	uint64_t seq;
> -
> -	int r;
> +	int r = 0;
>   
>   	job = p->job;
>   	p->job = NULL;
> @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	if (r)
>   		goto error_unlock;
>   
> -	/* No memory allocation is allowed while holding the mn lock */
> +	/* No memory allocation is allowed while holding the mn lock.
> +	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
> +	 * to BOs.
> +	 */
>   	amdgpu_mn_lock(p->mn);
> +
> +	/* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
>   	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>   		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>   
> -		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> -			r = -ERESTARTSYS;
> -			goto error_abort;
> -		}
> +		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +	}
> +	if (r) {
> +		r = -ERESTARTSYS;
> +		goto error_abort;
>   	}
>   
>   	job->owner = p->filp;
> @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   {
>   	struct amdgpu_device *adev = dev->dev_private;
> -	union drm_amdgpu_cs *cs = data;
> -	struct amdgpu_cs_parser parser = {};
> -	bool reserved_buffers = false;
> +	union drm_amdgpu_cs *cs;
> +	struct amdgpu_cs_parser parser;
> +	bool reserved_buffers;
> +	int tries = 10;
>   	int i, r;
>   
>   	if (!adev->accel_working)
>   		return -EBUSY;
>   
> +restart:
> +	memset(&parser, 0, sizeof(parser));
> +	cs = data;
> +	reserved_buffers = false;
> +

Well that is a NAK, see below.

>   	parser.adev = adev;
>   	parser.filp = filp;
>   
> @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   
>   out:
>   	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +
> +	if (r == -ERESTARTSYS) {
> +		if (!--tries) {
> +			DRM_ERROR("Possible deadlock? Retry too many times\n");
> +			return -EDEADLK;
> +		}
> +		goto restart;

You really need to restart the IOCTL to potentially handle signals.

But since the calling code correctly handles this already you can just 
drop this change as far as I can see.

> +	}
> +
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d21dd2f369da..555285e329ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   
>   		r = amdgpu_bo_reserve(bo, true);
>   		if (r)
> -			goto free_pages;
> +			goto user_pages_done;
>   
>   		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
>   		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   		amdgpu_bo_unreserve(bo);
>   		if (r)
> -			goto free_pages;
> +			goto user_pages_done;
>   	}
>   
>   	r = drm_gem_handle_create(filp, gobj, &handle);
> -	/* drop reference from allocate - handle holds it now */
> -	drm_gem_object_put_unlocked(gobj);
>   	if (r)
> -		return r;
> +		goto user_pages_done;
>   
>   	args->handle = handle;
> -	return 0;
>   
> -free_pages:
> -	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
> +user_pages_done:
> +	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>   
>   release_object:
>   	drm_gem_object_put_unlocked(gobj);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e356867d2308..fa2516016c43 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>   			true, false, MAX_SCHEDULE_TIMEOUT);
>   		if (r <= 0)
>   			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> -
> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>   	}
>   }
>   
> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>   	mutex_unlock(&adev->mn_lock);
>   }
>   
> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> +		(1 << 0), /* HMM_PFN_VALID */
> +		(1 << 1), /* HMM_PFN_WRITE */
> +		0 /* HMM_PFN_DEVICE_PRIVATE */
> +};
> +
> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
> +		0, /* HMM_PFN_NONE */
> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
> +};
> +
> +void amdgpu_hmm_init_range(struct hmm_range *range)
> +{
> +	if (range) {
> +		range->flags = hmm_range_flags;
> +		range->values = hmm_range_values;
> +		range->pfn_shift = PAGE_SHIFT;
> +		range->pfns = NULL;
> +		INIT_LIST_HEAD(&range->list);
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index 0a51fd00021c..4803e216e174 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -25,9 +25,10 @@
>   #define __AMDGPU_MN_H__
>   
>   /*
> - * MMU Notifier
> + * HMM mirror
>    */
>   struct amdgpu_mn;
> +struct hmm_range;
>   
>   enum amdgpu_mn_type {
>   	AMDGPU_MN_TYPE_GFX,
> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   				enum amdgpu_mn_type type);
>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>   void amdgpu_mn_unregister(struct amdgpu_bo *bo);
> +void amdgpu_hmm_init_range(struct hmm_range *range);
>   #else
>   static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>   static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 73e71e61dc99..3fd7851da0c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/debugfs.h>
>   #include <linux/iommu.h>
> +#include <linux/hmm.h>
>   #include "amdgpu.h"
>   #include "amdgpu_object.h"
>   #include "amdgpu_trace.h"
> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>   /*
>    * TTM backend functions.
>    */
> -struct amdgpu_ttm_gup_task_list {
> -	struct list_head	list;
> -	struct task_struct	*task;
> -};
> -
>   struct amdgpu_ttm_tt {
>   	struct ttm_dma_tt	ttm;
>   	u64			offset;
> @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {
>   	uint32_t		userflags;

>   	spinlock_t              guptasklock;
>   	struct list_head        guptasks;

Those fields are now also completely superfluous.

Apart from those two comments that now looks really good to me.

Regards,
Christian.

> -	atomic_t		mmu_invalidations;
> -	uint32_t		last_set_pages;
> +	struct hmm_range	range;
>   };
>   
>   /**
> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
> - * pointer to memory
> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
> + * memory and start HMM tracking CPU page table update
>    *
> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
> - * This provides a wrapper around the get_user_pages() call to provide
> - * device accessible pages that back user memory.
> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
> + * once afterwards to stop HMM tracking
>    */
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   	struct mm_struct *mm = gtt->usertask->mm;
> -	unsigned int flags = 0;
> -	unsigned pinned = 0;
> -	int r;
> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> +	struct hmm_range *range = &gtt->range;
> +	int r = 0, i;
>   
>   	if (!mm) /* Happens during process shutdown */
>   		return -ESRCH;
>   
> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> -		flags |= FOLL_WRITE;
> +	amdgpu_hmm_init_range(range);
>   
>   	down_read(&mm->mmap_sem);
>   
> -	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
> -		/*
> -		 * check that we only use anonymous memory to prevent problems
> -		 * with writeback
> -		 */
> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> -		struct vm_area_struct *vma;
> +	range->vma = find_vma(mm, gtt->userptr);
> +	if (!range_in_vma(range->vma, gtt->userptr, end))
> +		r = -EFAULT;
> +	else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> +		range->vma->vm_file)
> +		r = -EPERM;
> +	if (r)
> +		goto out;
>   
> -		vma = find_vma(mm, gtt->userptr);
> -		if (!vma || vma->vm_file || vma->vm_end < end) {
> -			up_read(&mm->mmap_sem);
> -			return -EPERM;
> -		}
> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> +				     GFP_KERNEL);
> +	if (range->pfns == NULL) {
> +		r = -ENOMEM;
> +		goto out;
>   	}
> +	range->start = gtt->userptr;
> +	range->end = end;
>   
> -	/* loop enough times using contiguous pages of memory */
> -	do {
> -		unsigned num_pages = ttm->num_pages - pinned;
> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
> -		struct page **p = pages + pinned;
> -		struct amdgpu_ttm_gup_task_list guptask;
> +	range->pfns[0] = range->flags[HMM_PFN_VALID];
> +	range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> +				0 : range->flags[HMM_PFN_WRITE];
> +	for (i = 1; i < ttm->num_pages; i++)
> +		range->pfns[i] = range->pfns[0];
>   
> -		guptask.task = current;
> -		spin_lock(&gtt->guptasklock);
> -		list_add(&guptask.list, &gtt->guptasks);
> -		spin_unlock(&gtt->guptasklock);
> +	/* This may trigger page table update */
> +	r = hmm_vma_fault(range, true);
> +	if (r)
> +		goto out_free_pfns;
>   
> -		if (mm == current->mm)
> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
> -		else
> -			r = get_user_pages_remote(gtt->usertask,
> -					mm, userptr, num_pages,
> -					flags, p, NULL, NULL);
> +	up_read(&mm->mmap_sem);
>   
> -		spin_lock(&gtt->guptasklock);
> -		list_del(&guptask.list);
> -		spin_unlock(&gtt->guptasklock);
> +	for (i = 0; i < ttm->num_pages; i++)
> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>   
> -		if (r < 0)
> -			goto release_pages;
> +	return 0;
>   
> -		pinned += r;
> +out_free_pfns:
> +	kvfree(range->pfns);
> +	range->pfns = NULL;
> +out:
> +	up_read(&mm->mmap_sem);
> +	return r;
> +}
>   
> -	} while (pinned < ttm->num_pages);
> +/**
> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
> + * Check if the pages backing this ttm range have been invalidated
> + *
> + * Returns: true if pages are still valid
> + */
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	bool r = false;
>   
> -	up_read(&mm->mmap_sem);
> -	return 0;
> +	if (!gtt || !gtt->userptr)
> +		return false;
> +
> +	WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");
> +	if (gtt->range.pfns) {
> +		r = hmm_vma_range_done(&gtt->range);
> +		kvfree(gtt->range.pfns);
> +		gtt->range.pfns = NULL;
> +	}
>   
> -release_pages:
> -	release_pages(pages, pinned);
> -	up_read(&mm->mmap_sem);
>   	return r;
>   }
>   
> @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>    */
>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   	unsigned i;
>   
> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
> -	for (i = 0; i < ttm->num_pages; ++i) {
> -		if (ttm->pages[i])
> -			put_page(ttm->pages[i]);
> -
> +	for (i = 0; i < ttm->num_pages; ++i)
>   		ttm->pages[i] = pages ? pages[i] : NULL;
> -	}
>   }
>   
>   /**
> @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>   	/* unmap the pages mapped to the device */
>   	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>   
> -	/* mark the pages as dirty */
> -	amdgpu_ttm_tt_mark_user_pages(ttm);
> -
>   	sg_free_table(ttm->sg);
> +
> +	if (gtt->range.pfns &&
> +	    ttm->pages[0] == hmm_pfn_to_page(&gtt->range, gtt->range.pfns[0]))
> +		WARN_ONCE(1, "Missing get_user_page_done\n");
>   }
>   
>   int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
> @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>   
>   	spin_lock_init(&gtt->guptasklock);
>   	INIT_LIST_HEAD(&gtt->guptasks);
> -	atomic_set(&gtt->mmu_invalidations, 0);
> -	gtt->last_set_pages = 0;
>   
>   	return 0;
>   }
> @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>   				  unsigned long end)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	struct amdgpu_ttm_gup_task_list *entry;
>   	unsigned long size;
>   
>   	if (gtt == NULL || !gtt->userptr)
> @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>   	if (gtt->userptr > end || gtt->userptr + size <= start)
>   		return false;
>   
> -	/* Search the lists of tasks that hold this mapping and see
> -	 * if current is one of them.  If it is return false.
> -	 */
> -	spin_lock(&gtt->guptasklock);
> -	list_for_each_entry(entry, &gtt->guptasks, list) {
> -		if (entry->task == current) {
> -			spin_unlock(&gtt->guptasklock);
> -			return false;
> -		}
> -	}
> -	spin_unlock(&gtt->guptasklock);
> -
> -	atomic_inc(&gtt->mmu_invalidations);
> -
>   	return true;
>   }
>   
>   /**
> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
> - */
> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> -				       int *last_invalidated)
> -{
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	int prev_invalidated = *last_invalidated;
> -
> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
> -	return prev_invalidated != *last_invalidated;
> -}
> -
> -/**
> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
> - * been invalidated since the last time they've been set?
> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>    */
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   
>   	if (gtt == NULL || !gtt->userptr)
>   		return false;
>   
> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
> +	return true;
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b5b2d101f7db..8988c87fff9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>   
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>   void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>   int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>   				  unsigned long end);
>   bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>   				       int *last_invalidated);
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>   bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
Hi Christian,

My comments are embedded below. I will submit another patch to address 
those.

Thanks,
Philip

On 2019-02-05 6:52 a.m., Christian König wrote:
> Am 04.02.19 um 19:23 schrieb Yang, Philip:

>> Use HMM helper function hmm_vma_fault() to get physical pages backing

>> userptr and start CPU page table update track of those pages. Then use

>> hmm_vma_range_done() to check if those pages are updated before

>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

>>

>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart

>> from scratch, for kfd, restore worker is rescheduled to retry.

>>

>> HMM simplify the CPU page table concurrent update check, so remove

>> guptasklock, mmu_invalidations, last_set_pages fields from

>> amdgpu_ttm_tt struct.

>>

>> HMM does not pin the page (increase page ref count), so remove related

>> operations like release_pages(), put_page(), mark_page_dirty().

>>

>> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67

>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

>> ---

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -

>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++-------

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   2 +-

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 158 +++++++---------

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  25 ++-

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   4 +-

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 173 ++++++++----------

>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-

>>   9 files changed, 198 insertions(+), 277 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

>> index 0b31a1859023..0e1711a75b68 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

>> @@ -61,7 +61,6 @@ struct kgd_mem {

>>       atomic_t invalid;

>>       struct amdkfd_process_info *process_info;

>> -    struct page **user_pages;

>>       struct amdgpu_sync sync;

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> index d7b10d79f1de..ae2d838d31ea 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

>> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, 

>> struct mm_struct *mm,

>>           goto out;

>>       }

>> -    /* If no restore worker is running concurrently, user_pages

>> -     * should not be allocated

>> -     */

>> -    WARN(mem->user_pages, "Leaking user_pages array");

>> -

>> -    mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,

>> -                       sizeof(struct page *),

>> -                       GFP_KERNEL | __GFP_ZERO);

>> -    if (!mem->user_pages) {

>> -        pr_err("%s: Failed to allocate pages array\n", __func__);

>> -        ret = -ENOMEM;

>> -        goto unregister_out;

>> -    }

>> -

>> -    ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);

>> +    ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);

>>       if (ret) {

>>           pr_err("%s: Failed to get user pages: %d\n", __func__, ret);

>> -        goto free_out;

>> +        goto unregister_out;

>>       }

>> -    amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);

>> -

>>       ret = amdgpu_bo_reserve(bo, true);

>>       if (ret) {

>>           pr_err("%s: Failed to reserve BO\n", __func__);

>> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, 

>> struct mm_struct *mm,

>>       amdgpu_bo_unreserve(bo);

>>   release_out:

>> -    if (ret)

>> -        release_pages(mem->user_pages, bo->tbo.ttm->num_pages);

>> -free_out:

>> -    kvfree(mem->user_pages);

>> -    mem->user_pages = NULL;

>> +    amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>>   unregister_out:

>>       if (ret)

>>           amdgpu_mn_unregister(bo);

>> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,

>>       ctx->kfd_bo.priority = 0;

>>       ctx->kfd_bo.tv.bo = &bo->tbo;

>>       ctx->kfd_bo.tv.num_shared = 1;

>> -    ctx->kfd_bo.user_pages = NULL;

>>       list_add(&ctx->kfd_bo.tv.head, &ctx->list);

>>       amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);

>> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem 

>> *mem,

>>       ctx->kfd_bo.priority = 0;

>>       ctx->kfd_bo.tv.bo = &bo->tbo;

>>       ctx->kfd_bo.tv.num_shared = 1;

>> -    ctx->kfd_bo.user_pages = NULL;

>>       list_add(&ctx->kfd_bo.tv.head, &ctx->list);

>>       i = 0;

>> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(

>>       list_del(&bo_list_entry->head);

>>       mutex_unlock(&process_info->lock);

>> -    /* Free user pages if necessary */

>> -    if (mem->user_pages) {

>> -        pr_debug("%s: Freeing user_pages array\n", __func__);

>> -        if (mem->user_pages[0])

>> -            release_pages(mem->user_pages,

>> -                    mem->bo->tbo.ttm->num_pages);

>> -        kvfree(mem->user_pages);

>> -    }

>> -

>>       ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);

>>       if (unlikely(ret))

>>           return ret;

>> @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct 

>> amdkfd_process_info *process_info,

>>           bo = mem->bo;

>> -        if (!mem->user_pages) {

>> -            mem->user_pages =

>> -                kvmalloc_array(bo->tbo.ttm->num_pages,

>> -                         sizeof(struct page *),

>> -                         GFP_KERNEL | __GFP_ZERO);

>> -            if (!mem->user_pages) {

>> -                pr_err("%s: Failed to allocate pages array\n",

>> -                       __func__);

>> -                return -ENOMEM;

>> -            }

>> -        } else if (mem->user_pages[0]) {

>> -            release_pages(mem->user_pages, bo->tbo.ttm->num_pages);

>> -        }

>> -

>>           /* Get updated user pages */

>>           ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,

>> -                           mem->user_pages);

>> +                           bo->tbo.ttm->pages);

>>           if (ret) {

>> -            mem->user_pages[0] = NULL;

>> +            bo->tbo.ttm->pages[0] = NULL;

>>               pr_info("%s: Failed to get user pages: %d\n",

>>                   __func__, ret);

>>               /* Pretend it succeeded. It will fail later

>> @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct 

>> amdkfd_process_info *process_info,

>>                * stalled user mode queues.

>>                */

>>           }

>> -

>> -        /* Mark the BO as valid unless it was invalidated

>> -         * again concurrently

>> -         */

>> -        if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)

>> -            return -EAGAIN;

>>       }

>>       return 0;

>> @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct 

>> amdkfd_process_info *process_info)

>>                        GFP_KERNEL);

>>       if (!pd_bo_list_entries) {

>>           pr_err("%s: Failed to allocate PD BO list entries\n", 

>> __func__);

>> -        return -ENOMEM;

>> +        ret = -ENOMEM;

>> +        goto out_no_mem;

>>       }

>>       INIT_LIST_HEAD(&resv_list);

>> @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct 

>> amdkfd_process_info *process_info)

>>       ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, 

>> &duplicates);

>>       WARN(!list_empty(&duplicates), "Duplicates should be empty");

>>       if (ret)

>> -        goto out;

>> +        goto out_free;

>>       amdgpu_sync_create(&sync);

>> @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct 

>> amdkfd_process_info *process_info)

>>           bo = mem->bo;

>> -        /* Copy pages array and validate the BO if we got user pages */

>> -        if (mem->user_pages[0]) {

>> -            amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,

>> -                             mem->user_pages);

>> +        /* Validate the BO if we got user pages */

>> +        if (bo->tbo.ttm->pages[0]) {

>>               amdgpu_bo_placement_from_domain(bo, mem->domain);

>>               ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

>>               if (ret) {

>> @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct 

>> amdkfd_process_info *process_info)

>>               }

>>           }

>> -        /* Validate succeeded, now the BO owns the pages, free

>> -         * our copy of the pointer array. Put this BO back on

>> -         * the userptr_valid_list. If we need to revalidate

>> -         * it, we need to start from scratch.

>> -         */

>> -        kvfree(mem->user_pages);

>> -        mem->user_pages = NULL;

>>           list_move_tail(&mem->validate_list.head,

>>                      &process_info->userptr_valid_list);

>> +        /* Stop HMM track the userptr update. We dont check the return

>> +         * value for concurrent CPU page table update because we will

>> +         * reschedule the restore worker if process_info->evicted_bos

>> +         * is updated.

>> +         */

>> +        amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>> +

>>           /* Update mapping. If the BO was not validated

>>            * (because we couldn't get user pages), this will

>>            * clear the page table entries, which will result in

>> @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct 

>> amdkfd_process_info *process_info)

>>       ttm_eu_backoff_reservation(&ticket, &resv_list);

>>       amdgpu_sync_wait(&sync, false);

>>       amdgpu_sync_free(&sync);

>> -out:

>> +out_free:

>>       kfree(pd_bo_list_entries);

>> +out_no_mem:

>> +    list_for_each_entry_safe(mem, tmp_mem,

>> +                 &process_info->userptr_inval_list,

>> +                 validate_list.head) {

>> +        bo = mem->bo;

>> +        amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>> +    }

>>       return ret;

>>   }

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

>> index 7c5f5d1601e6..a130e766cbdb 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

>> @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {

>>       struct amdgpu_bo_va        *bo_va;

>>       uint32_t            priority;

>>       struct page            **user_pages;

>> -    int                user_invalidated;

>> +    bool                user_invalidated;

>>   };

>>   struct amdgpu_bo_list {

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

>> index 1c49b8266d69..451a1fab17d6 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

>> @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct 

>> amdgpu_cs_parser *p,

>>       p->uf_entry.tv.bo = &bo->tbo;

>>       /* One for TTM and one for the CS job */

>>       p->uf_entry.tv.num_shared = 2;

>> -    p->uf_entry.user_pages = NULL;

>>       drm_gem_object_put_unlocked(gobj);

>> @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct 

>> amdgpu_cs_parser *p,

>>           if (usermm && usermm != current->mm)

>>               return -EPERM;

>> -        /* Check if we have user pages and nobody bound the BO 

>> already */

>> -        if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&

>> -            lobj->user_pages) {

>> +        if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&

>> +            lobj->user_invalidated && lobj->user_pages) {

>>               amdgpu_bo_placement_from_domain(bo,

>>                               AMDGPU_GEM_DOMAIN_CPU);

>>               r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

>>               if (r)

>>                   return r;

>> +

>>               amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,

>>                                lobj->user_pages);

>>               binding_userptr = true;

>> @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct 

>> amdgpu_cs_parser *p,

>>       struct amdgpu_bo *gds;

>>       struct amdgpu_bo *gws;

>>       struct amdgpu_bo *oa;

>> -    unsigned tries = 10;

>>       int r;

>>       INIT_LIST_HEAD(&p->validated);

>> @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct 

>> amdgpu_cs_parser *p,

>>       if (p->uf_entry.tv.bo && 

>> !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)

>>           list_add(&p->uf_entry.tv.head, &p->validated);

>> -    while (1) {

>> -        struct list_head need_pages;

>> -

>> -        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,

>> -                       &duplicates);

>> -        if (unlikely(r != 0)) {

>> -            if (r != -ERESTARTSYS)

>> -                DRM_ERROR("ttm_eu_reserve_buffers failed.\n");

>> -            goto error_free_pages;

>> -        }

>> -

>> -        INIT_LIST_HEAD(&need_pages);

>> -        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {

>> -            struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

>> -

>> -            if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,

>> -                 &e->user_invalidated) && e->user_pages) {

>> -

>> -                /* We acquired a page array, but somebody

>> -                 * invalidated it. Free it and try again

>> -                 */

>> -                release_pages(e->user_pages,

>> -                          bo->tbo.ttm->num_pages);

>> -                kvfree(e->user_pages);

>> -                e->user_pages = NULL;

>> -            }

>> -

>> -            if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&

>> -                !e->user_pages) {

>> -                list_del(&e->tv.head);

>> -                list_add(&e->tv.head, &need_pages);

>> -

>> -                amdgpu_bo_unreserve(bo);

>> -            }

>> +    /* Get userptr backing pages. If pages are updated after registered

>> +     * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do

>> +     * amdgpu_ttm_backend_bind() to flush and invalidate new pages

>> +     */

>> +    amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {

>> +        struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

>> +        bool userpage_invalidated = false;

>> +        int i;

>> +

>> +        e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,

>> +                    sizeof(struct page *),

>> +                    GFP_KERNEL | __GFP_ZERO);

>> +        if (!e->user_pages) {

>> +            DRM_ERROR("calloc failure\n");

>> +            return -ENOMEM;

>>           }

>> -        if (list_empty(&need_pages))

>> -            break;

>> -

>> -        /* Unreserve everything again. */

>> -        ttm_eu_backoff_reservation(&p->ticket, &p->validated);

>> -

>> -        /* We tried too many times, just abort */

>> -        if (!--tries) {

>> -            r = -EDEADLK;

>> -            DRM_ERROR("deadlock in %s\n", __func__);

>> -            goto error_free_pages;

>> +        r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);

>> +        if (r) {

>> +            kvfree(e->user_pages);

>> +            e->user_pages = NULL;

>> +            return r;

>>           }

>> -        /* Fill the page arrays for all userptrs. */

>> -        list_for_each_entry(e, &need_pages, tv.head) {

>> -            struct ttm_tt *ttm = e->tv.bo->ttm;

>> -

>> -            e->user_pages = kvmalloc_array(ttm->num_pages,

>> -                             sizeof(struct page*),

>> -                             GFP_KERNEL | __GFP_ZERO);

>> -            if (!e->user_pages) {

>> -                r = -ENOMEM;

>> -                DRM_ERROR("calloc failure in %s\n", __func__);

>> -                goto error_free_pages;

>> -            }

>> -

>> -            r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);

>> -            if (r) {

>> -                DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");

>> -                kvfree(e->user_pages);

>> -                e->user_pages = NULL;

>> -                goto error_free_pages;

>> +        for (i = 0; i < bo->tbo.ttm->num_pages; i++) {

>> +            if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {

>> +                userpage_invalidated = true;

>> +                break;

>>               }

>>           }

>> +        e->user_invalidated = userpage_invalidated;

>> +    }

>> -        /* And try again. */

>> -        list_splice(&need_pages, &p->validated);

>> +    r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,

>> +                   &duplicates);

>> +    if (unlikely(r != 0)) {

>> +        if (r != -ERESTARTSYS)

>> +            DRM_ERROR("ttm_eu_reserve_buffers failed.\n");

>> +        goto out;

>>       }

>>       amdgpu_cs_get_threshold_for_moves(p->adev, 

>> &p->bytes_moved_threshold,

>> @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct 

>> amdgpu_cs_parser *p,

>>   error_validate:

>>       if (r)

>>           ttm_eu_backoff_reservation(&p->ticket, &p->validated);

>> -

>> -error_free_pages:

>> -

>> -    amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {

>> -        if (!e->user_pages)

>> -            continue;

>> -

>> -        release_pages(e->user_pages, e->tv.bo->ttm->num_pages);

>> -        kvfree(e->user_pages);

>> -    }

>> -

>> +out:

>>       return r;

>>   }

>> @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct 

>> amdgpu_cs_parser *p,

>>       struct amdgpu_bo_list_entry *e;

>>       struct amdgpu_job *job;

>>       uint64_t seq;

>> -

>> -    int r;

>> +    int r = 0;

>>       job = p->job;

>>       p->job = NULL;

>> @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct 

>> amdgpu_cs_parser *p,

>>       if (r)

>>           goto error_unlock;

>> -    /* No memory allocation is allowed while holding the mn lock */

>> +    /* No memory allocation is allowed while holding the mn lock.

>> +     * p->mn is hold until amdgpu_cs_submit is finished and fence is 

>> added

>> +     * to BOs.

>> +     */

>>       amdgpu_mn_lock(p->mn);

>> +

>> +    /* If userptr are updated after amdgpu_cs_parser_bos(), restart 

>> cs */

>>       amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {

>>           struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

>> -        if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {

>> -            r = -ERESTARTSYS;

>> -            goto error_abort;

>> -        }

>> +        r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>> +    }

>> +    if (r) {

>> +        r = -ERESTARTSYS;

>> +        goto error_abort;

>>       }

>>       job->owner = p->filp;

>> @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct 

>> amdgpu_cs_parser *p,

>>   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct 

>> drm_file *filp)

>>   {

>>       struct amdgpu_device *adev = dev->dev_private;

>> -    union drm_amdgpu_cs *cs = data;

>> -    struct amdgpu_cs_parser parser = {};

>> -    bool reserved_buffers = false;

>> +    union drm_amdgpu_cs *cs;

>> +    struct amdgpu_cs_parser parser;

>> +    bool reserved_buffers;

>> +    int tries = 10;

>>       int i, r;

>>       if (!adev->accel_working)

>>           return -EBUSY;

>> +restart:

>> +    memset(&parser, 0, sizeof(parser));

>> +    cs = data;

>> +    reserved_buffers = false;

>> +

> 

> Well that is a NAK, see below.

> 

>>       parser.adev = adev;

>>       parser.filp = filp;

>> @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 

>> void *data, struct drm_file *filp)

>>   out:

>>       amdgpu_cs_parser_fini(&parser, r, reserved_buffers);

>> +

>> +    if (r == -ERESTARTSYS) {

>> +        if (!--tries) {

>> +            DRM_ERROR("Possible deadlock? Retry too many times\n");

>> +            return -EDEADLK;

>> +        }

>> +        goto restart;

> 

> You really need to restart the IOCTL to potentially handle signals.

> 

> But since the calling code correctly handles this already you can just 

> drop this change as far as I can see.

> 

I agree that we should return -ERESTARTSYS to upper layer to handle signals.

But I do not find upper layers handle -ERESTARTSYS in the entire calling 
path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl -> 
drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to 
application. I confirm it, libdrm userptr test application calling 
amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.

So application should handle -ERESTARTSYS to restart the ioctl, but 
libdrm userptr test application doesn't handle this. This causes the 
test failed.

Below are details of userptr path difference. For the previous path, 
libdrm test always goes to step 2, step 3 never trigger. So it never 
return -ERESTARTSYS, but in theory, this could happen.

For HMM path, the test always goes to step 3, we have to handle this 
case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to 
return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will 
submit new patch.

The previous userptr path:
1. gem_userptr_ioctl to register userptr
2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update 
userptr
3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated, 
return -ERESTARTSYS

The new HMM userptr path:
1. gem_userptr_ioctl to register userptr
2. amdgpu_cs_parser_bos, start HMM to track userptr update
3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is 
invalidated, return -ERESTARTSYS


>> +    }

>> +

>>       return r;

>>   }

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> index d21dd2f369da..555285e329ed 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device 

>> *dev, void *data,

>>           r = amdgpu_bo_reserve(bo, true);

>>           if (r)

>> -            goto free_pages;

>> +            goto user_pages_done;

>>           amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);

>>           r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

>>           amdgpu_bo_unreserve(bo);

>>           if (r)

>> -            goto free_pages;

>> +            goto user_pages_done;

>>       }

>>       r = drm_gem_handle_create(filp, gobj, &handle);

>> -    /* drop reference from allocate - handle holds it now */

>> -    drm_gem_object_put_unlocked(gobj);

>>       if (r)

>> -        return r;

>> +        goto user_pages_done;

>>       args->handle = handle;

>> -    return 0;

>> -free_pages:

>> -    release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);

>> +user_pages_done:

>> +    if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)

>> +        amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>>   release_object:

>>       drm_gem_object_put_unlocked(gobj);

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>> index e356867d2308..fa2516016c43 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct 

>> amdgpu_mn_node *node,

>>               true, false, MAX_SCHEDULE_TIMEOUT);

>>           if (r <= 0)

>>               DRM_ERROR("(%ld) failed to wait for user bo\n", r);

>> -

>> -        amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);

>>       }

>>   }

>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)

>>       mutex_unlock(&adev->mn_lock);

>>   }

>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */

>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {

>> +        (1 << 0), /* HMM_PFN_VALID */

>> +        (1 << 1), /* HMM_PFN_WRITE */

>> +        0 /* HMM_PFN_DEVICE_PRIVATE */

>> +};

>> +

>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {

>> +        0xfffffffffffffffeUL, /* HMM_PFN_ERROR */

>> +        0, /* HMM_PFN_NONE */

>> +        0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */

>> +};

>> +

>> +void amdgpu_hmm_init_range(struct hmm_range *range)

>> +{

>> +    if (range) {

>> +        range->flags = hmm_range_flags;

>> +        range->values = hmm_range_values;

>> +        range->pfn_shift = PAGE_SHIFT;

>> +        range->pfns = NULL;

>> +        INIT_LIST_HEAD(&range->list);

>> +    }

>> +}

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>> index 0a51fd00021c..4803e216e174 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>> @@ -25,9 +25,10 @@

>>   #define __AMDGPU_MN_H__

>>   /*

>> - * MMU Notifier

>> + * HMM mirror

>>    */

>>   struct amdgpu_mn;

>> +struct hmm_range;

>>   enum amdgpu_mn_type {

>>       AMDGPU_MN_TYPE_GFX,

>> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device 

>> *adev,

>>                   enum amdgpu_mn_type type);

>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);

>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo);

>> +void amdgpu_hmm_init_range(struct hmm_range *range);

>>   #else

>>   static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}

>>   static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>> index 73e71e61dc99..3fd7851da0c0 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>> @@ -43,6 +43,7 @@

>>   #include <linux/pagemap.h>

>>   #include <linux/debugfs.h>

>>   #include <linux/iommu.h>

>> +#include <linux/hmm.h>

>>   #include "amdgpu.h"

>>   #include "amdgpu_object.h"

>>   #include "amdgpu_trace.h"

>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct 

>> ttm_buffer_object *bo,

>>   /*

>>    * TTM backend functions.

>>    */

>> -struct amdgpu_ttm_gup_task_list {

>> -    struct list_head    list;

>> -    struct task_struct    *task;

>> -};

>> -

>>   struct amdgpu_ttm_tt {

>>       struct ttm_dma_tt    ttm;

>>       u64            offset;

>> @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {

>>       uint32_t        userflags;

> 

>>       spinlock_t              guptasklock;

>>       struct list_head        guptasks;

> 

> Those fields are now also completely superfluous.

> 

> Apart from those two comments that now looks really good to me.

> 

> Regards,

> Christian.

> 

Yes, those fields are superfluous, I remove it in new patch. Thanks.

>> -    atomic_t        mmu_invalidations;

>> -    uint32_t        last_set_pages;

>> +    struct hmm_range    range;

>>   };

>>   /**

>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a 

>> USERPTR

>> - * pointer to memory

>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that 

>> back user

>> + * memory and start HMM tracking CPU page table update

>>    *

>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().

>> - * This provides a wrapper around the get_user_pages() call to provide

>> - * device accessible pages that back user memory.

>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once 

>> and only

>> + * once afterwards to stop HMM tracking

>>    */

>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page 

>> **pages)

>>   {

>>       struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>       struct mm_struct *mm = gtt->usertask->mm;

>> -    unsigned int flags = 0;

>> -    unsigned pinned = 0;

>> -    int r;

>> +    unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>> +    struct hmm_range *range = &gtt->range;

>> +    int r = 0, i;

>>       if (!mm) /* Happens during process shutdown */

>>           return -ESRCH;

>> -    if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))

>> -        flags |= FOLL_WRITE;

>> +    amdgpu_hmm_init_range(range);

>>       down_read(&mm->mmap_sem);

>> -    if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {

>> -        /*

>> -         * check that we only use anonymous memory to prevent problems

>> -         * with writeback

>> -         */

>> -        unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>> -        struct vm_area_struct *vma;

>> +    range->vma = find_vma(mm, gtt->userptr);

>> +    if (!range_in_vma(range->vma, gtt->userptr, end))

>> +        r = -EFAULT;

>> +    else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&

>> +        range->vma->vm_file)

>> +        r = -EPERM;

>> +    if (r)

>> +        goto out;

>> -        vma = find_vma(mm, gtt->userptr);

>> -        if (!vma || vma->vm_file || vma->vm_end < end) {

>> -            up_read(&mm->mmap_sem);

>> -            return -EPERM;

>> -        }

>> +    range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),

>> +                     GFP_KERNEL);

>> +    if (range->pfns == NULL) {

>> +        r = -ENOMEM;

>> +        goto out;

>>       }

>> +    range->start = gtt->userptr;

>> +    range->end = end;

>> -    /* loop enough times using contiguous pages of memory */

>> -    do {

>> -        unsigned num_pages = ttm->num_pages - pinned;

>> -        uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;

>> -        struct page **p = pages + pinned;

>> -        struct amdgpu_ttm_gup_task_list guptask;

>> +    range->pfns[0] = range->flags[HMM_PFN_VALID];

>> +    range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?

>> +                0 : range->flags[HMM_PFN_WRITE];

>> +    for (i = 1; i < ttm->num_pages; i++)

>> +        range->pfns[i] = range->pfns[0];

>> -        guptask.task = current;

>> -        spin_lock(&gtt->guptasklock);

>> -        list_add(&guptask.list, &gtt->guptasks);

>> -        spin_unlock(&gtt->guptasklock);

>> +    /* This may trigger page table update */

>> +    r = hmm_vma_fault(range, true);

>> +    if (r)

>> +        goto out_free_pfns;

>> -        if (mm == current->mm)

>> -            r = get_user_pages(userptr, num_pages, flags, p, NULL);

>> -        else

>> -            r = get_user_pages_remote(gtt->usertask,

>> -                    mm, userptr, num_pages,

>> -                    flags, p, NULL, NULL);

>> +    up_read(&mm->mmap_sem);

>> -        spin_lock(&gtt->guptasklock);

>> -        list_del(&guptask.list);

>> -        spin_unlock(&gtt->guptasklock);

>> +    for (i = 0; i < ttm->num_pages; i++)

>> +        pages[i] = hmm_pfn_to_page(range, range->pfns[i]);

>> -        if (r < 0)

>> -            goto release_pages;

>> +    return 0;

>> -        pinned += r;

>> +out_free_pfns:

>> +    kvfree(range->pfns);

>> +    range->pfns = NULL;

>> +out:

>> +    up_read(&mm->mmap_sem);

>> +    return r;

>> +}

>> -    } while (pinned < ttm->num_pages);

>> +/**

>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page 

>> table change

>> + * Check if the pages backing this ttm range have been invalidated

>> + *

>> + * Returns: true if pages are still valid

>> + */

>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)

>> +{

>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>> +    bool r = false;

>> -    up_read(&mm->mmap_sem);

>> -    return 0;

>> +    if (!gtt || !gtt->userptr)

>> +        return false;

>> +

>> +    WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");

>> +    if (gtt->range.pfns) {

>> +        r = hmm_vma_range_done(&gtt->range);

>> +        kvfree(gtt->range.pfns);

>> +        gtt->range.pfns = NULL;

>> +    }

>> -release_pages:

>> -    release_pages(pages, pinned);

>> -    up_read(&mm->mmap_sem);

>>       return r;

>>   }

>> @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt 

>> *ttm, struct page **pages)

>>    */

>>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page 

>> **pages)

>>   {

>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>       unsigned i;

>> -    gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);

>> -    for (i = 0; i < ttm->num_pages; ++i) {

>> -        if (ttm->pages[i])

>> -            put_page(ttm->pages[i]);

>> -

>> +    for (i = 0; i < ttm->num_pages; ++i)

>>           ttm->pages[i] = pages ? pages[i] : NULL;

>> -    }

>>   }

>>   /**

>> @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct 

>> ttm_tt *ttm)

>>       /* unmap the pages mapped to the device */

>>       dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);

>> -    /* mark the pages as dirty */

>> -    amdgpu_ttm_tt_mark_user_pages(ttm);

>> -

>>       sg_free_table(ttm->sg);

>> +

>> +    if (gtt->range.pfns &&

>> +        ttm->pages[0] == hmm_pfn_to_page(&gtt->range, 

>> gtt->range.pfns[0]))

>> +        WARN_ONCE(1, "Missing get_user_page_done\n");

>>   }

>>   int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,

>> @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt 

>> *ttm, uint64_t addr,

>>       spin_lock_init(&gtt->guptasklock);

>>       INIT_LIST_HEAD(&gtt->guptasks);

>> -    atomic_set(&gtt->mmu_invalidations, 0);

>> -    gtt->last_set_pages = 0;

>>       return 0;

>>   }

>> @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt 

>> *ttm, unsigned long start,

>>                     unsigned long end)

>>   {

>>       struct amdgpu_ttm_tt *gtt = (void *)ttm;

>> -    struct amdgpu_ttm_gup_task_list *entry;

>>       unsigned long size;

>>       if (gtt == NULL || !gtt->userptr)

>> @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct 

>> ttm_tt *ttm, unsigned long start,

>>       if (gtt->userptr > end || gtt->userptr + size <= start)

>>           return false;

>> -    /* Search the lists of tasks that hold this mapping and see

>> -     * if current is one of them.  If it is return false.

>> -     */

>> -    spin_lock(&gtt->guptasklock);

>> -    list_for_each_entry(entry, &gtt->guptasks, list) {

>> -        if (entry->task == current) {

>> -            spin_unlock(&gtt->guptasklock);

>> -            return false;

>> -        }

>> -    }

>> -    spin_unlock(&gtt->guptasklock);

>> -

>> -    atomic_inc(&gtt->mmu_invalidations);

>> -

>>       return true;

>>   }

>>   /**

>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been 

>> invalidated?

>> - */

>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>> -                       int *last_invalidated)

>> -{

>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>> -    int prev_invalidated = *last_invalidated;

>> -

>> -    *last_invalidated = atomic_read(&gtt->mmu_invalidations);

>> -    return prev_invalidated != *last_invalidated;

>> -}

>> -

>> -/**

>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this 

>> ttm_tt object

>> - * been invalidated since the last time they've been set?

>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?

>>    */

>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)

>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)

>>   {

>>       struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>       if (gtt == NULL || !gtt->userptr)

>>           return false;

>> -    return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;

>> +    return true;

>>   }

>>   /**

>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 

>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>> index b5b2d101f7db..8988c87fff9d 100644

>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object 

>> *bo);

>>   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);

>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page 

>> **pages);

>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);

>>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page 

>> **pages);

>>   void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);

>>   int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,

>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt 

>> *ttm, unsigned long start,

>>                     unsigned long end);

>>   bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>>                          int *last_invalidated);

>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);

>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);

>>   bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);

>>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct 

>> ttm_mem_reg *mem);

>>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct 

>> ttm_tt *ttm,

>
Am 05.02.19 um 18:25 schrieb Yang, Philip:
> [SNIP]+

>>> +    if (r == -ERESTARTSYS) {

>>> +        if (!--tries) {

>>> +            DRM_ERROR("Possible deadlock? Retry too many times\n");

>>> +            return -EDEADLK;

>>> +        }

>>> +        goto restart;

>> You really need to restart the IOCTL to potentially handle signals.

>>

>> But since the calling code correctly handles this already you can just

>> drop this change as far as I can see.

>>

> I agree that we should return -ERESTARTSYS to upper layer to handle signals.

>

> But I do not find upper layers handle -ERESTARTSYS in the entire calling

> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl ->

> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to

> application. I confirm it, libdrm userptr test application calling

> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.

>

> So application should handle -ERESTARTSYS to restart the ioctl, but

> libdrm userptr test application doesn't handle this. This causes the

> test failed.


This is a bug in the test cases then.

-ERESTARTSYS can happen at any time during interruptible waiting and it 
is mandatory for the upper layer to handle it correctly.

>

> Below are details of userptr path difference. For the previous path,

> libdrm test always goes to step 2, step 3 never trigger. So it never

> return -ERESTARTSYS, but in theory, this could happen.

>

> For HMM path, the test always goes to step 3, we have to handle this

> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to

> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will

> submit new patch.


Clearly a NAK, this won't work correctly.

Christian.

>

> The previous userptr path:

> 1. gem_userptr_ioctl to register userptr

> 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update

> userptr

> 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated,

> return -ERESTARTSYS

>

> The new HMM userptr path:

> 1. gem_userptr_ioctl to register userptr

> 2. amdgpu_cs_parser_bos, start HMM to track userptr update

> 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is

> invalidated, return -ERESTARTSYS

>

>

>>> +    }

>>> +

>>>        return r;

>>>    }

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> index d21dd2f369da..555285e329ed 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device

>>> *dev, void *data,

>>>            r = amdgpu_bo_reserve(bo, true);

>>>            if (r)

>>> -            goto free_pages;

>>> +            goto user_pages_done;

>>>            amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);

>>>            r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

>>>            amdgpu_bo_unreserve(bo);

>>>            if (r)

>>> -            goto free_pages;

>>> +            goto user_pages_done;

>>>        }

>>>        r = drm_gem_handle_create(filp, gobj, &handle);

>>> -    /* drop reference from allocate - handle holds it now */

>>> -    drm_gem_object_put_unlocked(gobj);

>>>        if (r)

>>> -        return r;

>>> +        goto user_pages_done;

>>>        args->handle = handle;

>>> -    return 0;

>>> -free_pages:

>>> -    release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);

>>> +user_pages_done:

>>> +    if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)

>>> +        amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>>>    release_object:

>>>        drm_gem_object_put_unlocked(gobj);

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>> index e356867d2308..fa2516016c43 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct

>>> amdgpu_mn_node *node,

>>>                true, false, MAX_SCHEDULE_TIMEOUT);

>>>            if (r <= 0)

>>>                DRM_ERROR("(%ld) failed to wait for user bo\n", r);

>>> -

>>> -        amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);

>>>        }

>>>    }

>>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)

>>>        mutex_unlock(&adev->mn_lock);

>>>    }

>>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */

>>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {

>>> +        (1 << 0), /* HMM_PFN_VALID */

>>> +        (1 << 1), /* HMM_PFN_WRITE */

>>> +        0 /* HMM_PFN_DEVICE_PRIVATE */

>>> +};

>>> +

>>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {

>>> +        0xfffffffffffffffeUL, /* HMM_PFN_ERROR */

>>> +        0, /* HMM_PFN_NONE */

>>> +        0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */

>>> +};

>>> +

>>> +void amdgpu_hmm_init_range(struct hmm_range *range)

>>> +{

>>> +    if (range) {

>>> +        range->flags = hmm_range_flags;

>>> +        range->values = hmm_range_values;

>>> +        range->pfn_shift = PAGE_SHIFT;

>>> +        range->pfns = NULL;

>>> +        INIT_LIST_HEAD(&range->list);

>>> +    }

>>> +}

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>> index 0a51fd00021c..4803e216e174 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>> @@ -25,9 +25,10 @@

>>>    #define __AMDGPU_MN_H__

>>>    /*

>>> - * MMU Notifier

>>> + * HMM mirror

>>>     */

>>>    struct amdgpu_mn;

>>> +struct hmm_range;

>>>    enum amdgpu_mn_type {

>>>        AMDGPU_MN_TYPE_GFX,

>>> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device

>>> *adev,

>>>                    enum amdgpu_mn_type type);

>>>    int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);

>>>    void amdgpu_mn_unregister(struct amdgpu_bo *bo);

>>> +void amdgpu_hmm_init_range(struct hmm_range *range);

>>>    #else

>>>    static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}

>>>    static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>> index 73e71e61dc99..3fd7851da0c0 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>> @@ -43,6 +43,7 @@

>>>    #include <linux/pagemap.h>

>>>    #include <linux/debugfs.h>

>>>    #include <linux/iommu.h>

>>> +#include <linux/hmm.h>

>>>    #include "amdgpu.h"

>>>    #include "amdgpu_object.h"

>>>    #include "amdgpu_trace.h"

>>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct

>>> ttm_buffer_object *bo,

>>>    /*

>>>     * TTM backend functions.

>>>     */

>>> -struct amdgpu_ttm_gup_task_list {

>>> -    struct list_head    list;

>>> -    struct task_struct    *task;

>>> -};

>>> -

>>>    struct amdgpu_ttm_tt {

>>>        struct ttm_dma_tt    ttm;

>>>        u64            offset;

>>> @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {

>>>        uint32_t        userflags;

>>>        spinlock_t              guptasklock;

>>>        struct list_head        guptasks;

>> Those fields are now also completely superfluous.

>>

>> Apart from those two comments that now looks really good to me.

>>

>> Regards,

>> Christian.

>>

> Yes, those fields are superfluous, I remove it in new patch. Thanks.

>

>>> -    atomic_t        mmu_invalidations;

>>> -    uint32_t        last_set_pages;

>>> +    struct hmm_range    range;

>>>    };

>>>    /**

>>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a

>>> USERPTR

>>> - * pointer to memory

>>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that

>>> back user

>>> + * memory and start HMM tracking CPU page table update

>>>     *

>>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().

>>> - * This provides a wrapper around the get_user_pages() call to provide

>>> - * device accessible pages that back user memory.

>>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once

>>> and only

>>> + * once afterwards to stop HMM tracking

>>>     */

>>>    int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page

>>> **pages)

>>>    {

>>>        struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>        struct mm_struct *mm = gtt->usertask->mm;

>>> -    unsigned int flags = 0;

>>> -    unsigned pinned = 0;

>>> -    int r;

>>> +    unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>>> +    struct hmm_range *range = &gtt->range;

>>> +    int r = 0, i;

>>>        if (!mm) /* Happens during process shutdown */

>>>            return -ESRCH;

>>> -    if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))

>>> -        flags |= FOLL_WRITE;

>>> +    amdgpu_hmm_init_range(range);

>>>        down_read(&mm->mmap_sem);

>>> -    if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {

>>> -        /*

>>> -         * check that we only use anonymous memory to prevent problems

>>> -         * with writeback

>>> -         */

>>> -        unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>>> -        struct vm_area_struct *vma;

>>> +    range->vma = find_vma(mm, gtt->userptr);

>>> +    if (!range_in_vma(range->vma, gtt->userptr, end))

>>> +        r = -EFAULT;

>>> +    else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&

>>> +        range->vma->vm_file)

>>> +        r = -EPERM;

>>> +    if (r)

>>> +        goto out;

>>> -        vma = find_vma(mm, gtt->userptr);

>>> -        if (!vma || vma->vm_file || vma->vm_end < end) {

>>> -            up_read(&mm->mmap_sem);

>>> -            return -EPERM;

>>> -        }

>>> +    range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),

>>> +                     GFP_KERNEL);

>>> +    if (range->pfns == NULL) {

>>> +        r = -ENOMEM;

>>> +        goto out;

>>>        }

>>> +    range->start = gtt->userptr;

>>> +    range->end = end;

>>> -    /* loop enough times using contiguous pages of memory */

>>> -    do {

>>> -        unsigned num_pages = ttm->num_pages - pinned;

>>> -        uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;

>>> -        struct page **p = pages + pinned;

>>> -        struct amdgpu_ttm_gup_task_list guptask;

>>> +    range->pfns[0] = range->flags[HMM_PFN_VALID];

>>> +    range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?

>>> +                0 : range->flags[HMM_PFN_WRITE];

>>> +    for (i = 1; i < ttm->num_pages; i++)

>>> +        range->pfns[i] = range->pfns[0];

>>> -        guptask.task = current;

>>> -        spin_lock(&gtt->guptasklock);

>>> -        list_add(&guptask.list, &gtt->guptasks);

>>> -        spin_unlock(&gtt->guptasklock);

>>> +    /* This may trigger page table update */

>>> +    r = hmm_vma_fault(range, true);

>>> +    if (r)

>>> +        goto out_free_pfns;

>>> -        if (mm == current->mm)

>>> -            r = get_user_pages(userptr, num_pages, flags, p, NULL);

>>> -        else

>>> -            r = get_user_pages_remote(gtt->usertask,

>>> -                    mm, userptr, num_pages,

>>> -                    flags, p, NULL, NULL);

>>> +    up_read(&mm->mmap_sem);

>>> -        spin_lock(&gtt->guptasklock);

>>> -        list_del(&guptask.list);

>>> -        spin_unlock(&gtt->guptasklock);

>>> +    for (i = 0; i < ttm->num_pages; i++)

>>> +        pages[i] = hmm_pfn_to_page(range, range->pfns[i]);

>>> -        if (r < 0)

>>> -            goto release_pages;

>>> +    return 0;

>>> -        pinned += r;

>>> +out_free_pfns:

>>> +    kvfree(range->pfns);

>>> +    range->pfns = NULL;

>>> +out:

>>> +    up_read(&mm->mmap_sem);

>>> +    return r;

>>> +}

>>> -    } while (pinned < ttm->num_pages);

>>> +/**

>>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page

>>> table change

>>> + * Check if the pages backing this ttm range have been invalidated

>>> + *

>>> + * Returns: true if pages are still valid

>>> + */

>>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)

>>> +{

>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>> +    bool r = false;

>>> -    up_read(&mm->mmap_sem);

>>> -    return 0;

>>> +    if (!gtt || !gtt->userptr)

>>> +        return false;

>>> +

>>> +    WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");

>>> +    if (gtt->range.pfns) {

>>> +        r = hmm_vma_range_done(&gtt->range);

>>> +        kvfree(gtt->range.pfns);

>>> +        gtt->range.pfns = NULL;

>>> +    }

>>> -release_pages:

>>> -    release_pages(pages, pinned);

>>> -    up_read(&mm->mmap_sem);

>>>        return r;

>>>    }

>>> @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt

>>> *ttm, struct page **pages)

>>>     */

>>>    void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page

>>> **pages)

>>>    {

>>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>        unsigned i;

>>> -    gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);

>>> -    for (i = 0; i < ttm->num_pages; ++i) {

>>> -        if (ttm->pages[i])

>>> -            put_page(ttm->pages[i]);

>>> -

>>> +    for (i = 0; i < ttm->num_pages; ++i)

>>>            ttm->pages[i] = pages ? pages[i] : NULL;

>>> -    }

>>>    }

>>>    /**

>>> @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct

>>> ttm_tt *ttm)

>>>        /* unmap the pages mapped to the device */

>>>        dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);

>>> -    /* mark the pages as dirty */

>>> -    amdgpu_ttm_tt_mark_user_pages(ttm);

>>> -

>>>        sg_free_table(ttm->sg);

>>> +

>>> +    if (gtt->range.pfns &&

>>> +        ttm->pages[0] == hmm_pfn_to_page(&gtt->range,

>>> gtt->range.pfns[0]))

>>> +        WARN_ONCE(1, "Missing get_user_page_done\n");

>>>    }

>>>    int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,

>>> @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt

>>> *ttm, uint64_t addr,

>>>        spin_lock_init(&gtt->guptasklock);

>>>        INIT_LIST_HEAD(&gtt->guptasks);

>>> -    atomic_set(&gtt->mmu_invalidations, 0);

>>> -    gtt->last_set_pages = 0;

>>>        return 0;

>>>    }

>>> @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt

>>> *ttm, unsigned long start,

>>>                      unsigned long end)

>>>    {

>>>        struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>> -    struct amdgpu_ttm_gup_task_list *entry;

>>>        unsigned long size;

>>>        if (gtt == NULL || !gtt->userptr)

>>> @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct

>>> ttm_tt *ttm, unsigned long start,

>>>        if (gtt->userptr > end || gtt->userptr + size <= start)

>>>            return false;

>>> -    /* Search the lists of tasks that hold this mapping and see

>>> -     * if current is one of them.  If it is return false.

>>> -     */

>>> -    spin_lock(&gtt->guptasklock);

>>> -    list_for_each_entry(entry, &gtt->guptasks, list) {

>>> -        if (entry->task == current) {

>>> -            spin_unlock(&gtt->guptasklock);

>>> -            return false;

>>> -        }

>>> -    }

>>> -    spin_unlock(&gtt->guptasklock);

>>> -

>>> -    atomic_inc(&gtt->mmu_invalidations);

>>> -

>>>        return true;

>>>    }

>>>    /**

>>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been

>>> invalidated?

>>> - */

>>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>>> -                       int *last_invalidated)

>>> -{

>>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>> -    int prev_invalidated = *last_invalidated;

>>> -

>>> -    *last_invalidated = atomic_read(&gtt->mmu_invalidations);

>>> -    return prev_invalidated != *last_invalidated;

>>> -}

>>> -

>>> -/**

>>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this

>>> ttm_tt object

>>> - * been invalidated since the last time they've been set?

>>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?

>>>     */

>>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)

>>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)

>>>    {

>>>        struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>        if (gtt == NULL || !gtt->userptr)

>>>            return false;

>>> -    return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;

>>> +    return true;

>>>    }

>>>    /**

>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>> index b5b2d101f7db..8988c87fff9d 100644

>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object

>>> *bo);

>>>    int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);

>>>    int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page

>>> **pages);

>>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);

>>>    void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page

>>> **pages);

>>>    void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);

>>>    int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,

>>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt

>>> *ttm, unsigned long start,

>>>                      unsigned long end);

>>>    bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>>>                           int *last_invalidated);

>>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);

>>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);

>>>    bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);

>>>    uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct

>>> ttm_mem_reg *mem);

>>>    uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct

>>> ttm_tt *ttm,
Hi Christian,

I will submit new patch for review, my comments embedded inline below.

Thanks,
Philip

On 2019-02-05 1:09 p.m., Koenig, Christian wrote:
> Am 05.02.19 um 18:25 schrieb Yang, Philip:

>> [SNIP]+

>>>> +    if (r == -ERESTARTSYS) {

>>>> +        if (!--tries) {

>>>> +            DRM_ERROR("Possible deadlock? Retry too many times\n");

>>>> +            return -EDEADLK;

>>>> +        }

>>>> +        goto restart;

>>> You really need to restart the IOCTL to potentially handle signals.

>>>

>>> But since the calling code correctly handles this already you can just

>>> drop this change as far as I can see.

>>>

>> I agree that we should return -ERESTARTSYS to upper layer to handle signals.

>>

>> But I do not find upper layers handle -ERESTARTSYS in the entire calling

>> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl ->

>> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to

>> application. I confirm it, libdrm userptr test application calling

>> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.

>>

>> So application should handle -ERESTARTSYS to restart the ioctl, but

>> libdrm userptr test application doesn't handle this. This causes the

>> test failed.

> 

> This is a bug in the test cases then.

> 

> -ERESTARTSYS can happen at any time during interruptible waiting and it

> is mandatory for the upper layer to handle it correctly.

> 

-ERESTARTSYS can be returned only when signal is pending, signal handler 
will translate ERESTARTSYS to EINTR, drmIoctl in libdrm does handle 
EINTR and restart the ioctl. The test cases are ok.

Driver fail path should not return ERESTARTSYS to user space. The new 
patch, I change amdgpu_cs_submit to return -EAGAIN if userptr is 
updated, and amdgpu_cs_ioctl redo the ioctl only if error code is 
-EAGAIN. ERESTARTSYS error code returns to user space for signal handle 
as before.

>>

>> Below are details of userptr path difference. For the previous path,

>> libdrm test always goes to step 2, step 3 never trigger. So it never

>> return -ERESTARTSYS, but in theory, this could happen.

>>

>> For HMM path, the test always goes to step 3, we have to handle this

>> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to

>> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will

>> submit new patch.

> 

> Clearly a NAK, this won't work correctly.

> 

I don't understand your concern, may you explain the reason?

> Christian.

> 

>>

>> The previous userptr path:

>> 1. gem_userptr_ioctl to register userptr

>> 2. amdgpu_cs_parser_bos, check if userptr is invalidated, then update

>> userptr

>> 3. amdgpu_cs_submit, hold p->mn lock, check if userptr is invalidated,

>> return -ERESTARTSYS

>>

>> The new HMM userptr path:

>> 1. gem_userptr_ioctl to register userptr

>> 2. amdgpu_cs_parser_bos, start HMM to track userptr update

>> 3. amdgpu_cs_submit, hold p->mn lock, check HMM if userptr is

>> invalidated, return -ERESTARTSYS

>>

>>

>>>> +    }

>>>> +

>>>>         return r;

>>>>     }

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> index d21dd2f369da..555285e329ed 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

>>>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device

>>>> *dev, void *data,

>>>>             r = amdgpu_bo_reserve(bo, true);

>>>>             if (r)

>>>> -            goto free_pages;

>>>> +            goto user_pages_done;

>>>>             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);

>>>>             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

>>>>             amdgpu_bo_unreserve(bo);

>>>>             if (r)

>>>> -            goto free_pages;

>>>> +            goto user_pages_done;

>>>>         }

>>>>         r = drm_gem_handle_create(filp, gobj, &handle);

>>>> -    /* drop reference from allocate - handle holds it now */

>>>> -    drm_gem_object_put_unlocked(gobj);

>>>>         if (r)

>>>> -        return r;

>>>> +        goto user_pages_done;

>>>>         args->handle = handle;

>>>> -    return 0;

>>>> -free_pages:

>>>> -    release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);

>>>> +user_pages_done:

>>>> +    if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)

>>>> +        amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

>>>>     release_object:

>>>>         drm_gem_object_put_unlocked(gobj);

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>>> index e356867d2308..fa2516016c43 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

>>>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct

>>>> amdgpu_mn_node *node,

>>>>                 true, false, MAX_SCHEDULE_TIMEOUT);

>>>>             if (r <= 0)

>>>>                 DRM_ERROR("(%ld) failed to wait for user bo\n", r);

>>>> -

>>>> -        amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);

>>>>         }

>>>>     }

>>>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)

>>>>         mutex_unlock(&adev->mn_lock);

>>>>     }

>>>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */

>>>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {

>>>> +        (1 << 0), /* HMM_PFN_VALID */

>>>> +        (1 << 1), /* HMM_PFN_WRITE */

>>>> +        0 /* HMM_PFN_DEVICE_PRIVATE */

>>>> +};

>>>> +

>>>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {

>>>> +        0xfffffffffffffffeUL, /* HMM_PFN_ERROR */

>>>> +        0, /* HMM_PFN_NONE */

>>>> +        0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */

>>>> +};

>>>> +

>>>> +void amdgpu_hmm_init_range(struct hmm_range *range)

>>>> +{

>>>> +    if (range) {

>>>> +        range->flags = hmm_range_flags;

>>>> +        range->values = hmm_range_values;

>>>> +        range->pfn_shift = PAGE_SHIFT;

>>>> +        range->pfns = NULL;

>>>> +        INIT_LIST_HEAD(&range->list);

>>>> +    }

>>>> +}

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>>> index 0a51fd00021c..4803e216e174 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h

>>>> @@ -25,9 +25,10 @@

>>>>     #define __AMDGPU_MN_H__

>>>>     /*

>>>> - * MMU Notifier

>>>> + * HMM mirror

>>>>      */

>>>>     struct amdgpu_mn;

>>>> +struct hmm_range;

>>>>     enum amdgpu_mn_type {

>>>>         AMDGPU_MN_TYPE_GFX,

>>>> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device

>>>> *adev,

>>>>                     enum amdgpu_mn_type type);

>>>>     int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);

>>>>     void amdgpu_mn_unregister(struct amdgpu_bo *bo);

>>>> +void amdgpu_hmm_init_range(struct hmm_range *range);

>>>>     #else

>>>>     static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}

>>>>     static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>>> index 73e71e61dc99..3fd7851da0c0 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

>>>> @@ -43,6 +43,7 @@

>>>>     #include <linux/pagemap.h>

>>>>     #include <linux/debugfs.h>

>>>>     #include <linux/iommu.h>

>>>> +#include <linux/hmm.h>

>>>>     #include "amdgpu.h"

>>>>     #include "amdgpu_object.h"

>>>>     #include "amdgpu_trace.h"

>>>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct

>>>> ttm_buffer_object *bo,

>>>>     /*

>>>>      * TTM backend functions.

>>>>      */

>>>> -struct amdgpu_ttm_gup_task_list {

>>>> -    struct list_head    list;

>>>> -    struct task_struct    *task;

>>>> -};

>>>> -

>>>>     struct amdgpu_ttm_tt {

>>>>         struct ttm_dma_tt    ttm;

>>>>         u64            offset;

>>>> @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {

>>>>         uint32_t        userflags;

>>>>         spinlock_t              guptasklock;

>>>>         struct list_head        guptasks;

>>> Those fields are now also completely superfluous.

>>>

>>> Apart from those two comments that now looks really good to me.

>>>

>>> Regards,

>>> Christian.

>>>

>> Yes, those fields are superfluous, I remove it in new patch. Thanks.

>>

>>>> -    atomic_t        mmu_invalidations;

>>>> -    uint32_t        last_set_pages;

>>>> +    struct hmm_range    range;

>>>>     };

>>>>     /**

>>>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a

>>>> USERPTR

>>>> - * pointer to memory

>>>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that

>>>> back user

>>>> + * memory and start HMM tracking CPU page table update

>>>>      *

>>>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().

>>>> - * This provides a wrapper around the get_user_pages() call to provide

>>>> - * device accessible pages that back user memory.

>>>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once

>>>> and only

>>>> + * once afterwards to stop HMM tracking

>>>>      */

>>>>     int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page

>>>> **pages)

>>>>     {

>>>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>>         struct mm_struct *mm = gtt->usertask->mm;

>>>> -    unsigned int flags = 0;

>>>> -    unsigned pinned = 0;

>>>> -    int r;

>>>> +    unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>>>> +    struct hmm_range *range = &gtt->range;

>>>> +    int r = 0, i;

>>>>         if (!mm) /* Happens during process shutdown */

>>>>             return -ESRCH;

>>>> -    if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))

>>>> -        flags |= FOLL_WRITE;

>>>> +    amdgpu_hmm_init_range(range);

>>>>         down_read(&mm->mmap_sem);

>>>> -    if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {

>>>> -        /*

>>>> -         * check that we only use anonymous memory to prevent problems

>>>> -         * with writeback

>>>> -         */

>>>> -        unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;

>>>> -        struct vm_area_struct *vma;

>>>> +    range->vma = find_vma(mm, gtt->userptr);

>>>> +    if (!range_in_vma(range->vma, gtt->userptr, end))

>>>> +        r = -EFAULT;

>>>> +    else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&

>>>> +        range->vma->vm_file)

>>>> +        r = -EPERM;

>>>> +    if (r)

>>>> +        goto out;

>>>> -        vma = find_vma(mm, gtt->userptr);

>>>> -        if (!vma || vma->vm_file || vma->vm_end < end) {

>>>> -            up_read(&mm->mmap_sem);

>>>> -            return -EPERM;

>>>> -        }

>>>> +    range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),

>>>> +                     GFP_KERNEL);

>>>> +    if (range->pfns == NULL) {

>>>> +        r = -ENOMEM;

>>>> +        goto out;

>>>>         }

>>>> +    range->start = gtt->userptr;

>>>> +    range->end = end;

>>>> -    /* loop enough times using contiguous pages of memory */

>>>> -    do {

>>>> -        unsigned num_pages = ttm->num_pages - pinned;

>>>> -        uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;

>>>> -        struct page **p = pages + pinned;

>>>> -        struct amdgpu_ttm_gup_task_list guptask;

>>>> +    range->pfns[0] = range->flags[HMM_PFN_VALID];

>>>> +    range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?

>>>> +                0 : range->flags[HMM_PFN_WRITE];

>>>> +    for (i = 1; i < ttm->num_pages; i++)

>>>> +        range->pfns[i] = range->pfns[0];

>>>> -        guptask.task = current;

>>>> -        spin_lock(&gtt->guptasklock);

>>>> -        list_add(&guptask.list, &gtt->guptasks);

>>>> -        spin_unlock(&gtt->guptasklock);

>>>> +    /* This may trigger page table update */

>>>> +    r = hmm_vma_fault(range, true);

>>>> +    if (r)

>>>> +        goto out_free_pfns;

>>>> -        if (mm == current->mm)

>>>> -            r = get_user_pages(userptr, num_pages, flags, p, NULL);

>>>> -        else

>>>> -            r = get_user_pages_remote(gtt->usertask,

>>>> -                    mm, userptr, num_pages,

>>>> -                    flags, p, NULL, NULL);

>>>> +    up_read(&mm->mmap_sem);

>>>> -        spin_lock(&gtt->guptasklock);

>>>> -        list_del(&guptask.list);

>>>> -        spin_unlock(&gtt->guptasklock);

>>>> +    for (i = 0; i < ttm->num_pages; i++)

>>>> +        pages[i] = hmm_pfn_to_page(range, range->pfns[i]);

>>>> -        if (r < 0)

>>>> -            goto release_pages;

>>>> +    return 0;

>>>> -        pinned += r;

>>>> +out_free_pfns:

>>>> +    kvfree(range->pfns);

>>>> +    range->pfns = NULL;

>>>> +out:

>>>> +    up_read(&mm->mmap_sem);

>>>> +    return r;

>>>> +}

>>>> -    } while (pinned < ttm->num_pages);

>>>> +/**

>>>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page

>>>> table change

>>>> + * Check if the pages backing this ttm range have been invalidated

>>>> + *

>>>> + * Returns: true if pages are still valid

>>>> + */

>>>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)

>>>> +{

>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>> +    bool r = false;

>>>> -    up_read(&mm->mmap_sem);

>>>> -    return 0;

>>>> +    if (!gtt || !gtt->userptr)

>>>> +        return false;

>>>> +

>>>> +    WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");

>>>> +    if (gtt->range.pfns) {

>>>> +        r = hmm_vma_range_done(&gtt->range);

>>>> +        kvfree(gtt->range.pfns);

>>>> +        gtt->range.pfns = NULL;

>>>> +    }

>>>> -release_pages:

>>>> -    release_pages(pages, pinned);

>>>> -    up_read(&mm->mmap_sem);

>>>>         return r;

>>>>     }

>>>> @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt

>>>> *ttm, struct page **pages)

>>>>      */

>>>>     void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page

>>>> **pages)

>>>>     {

>>>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>>         unsigned i;

>>>> -    gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);

>>>> -    for (i = 0; i < ttm->num_pages; ++i) {

>>>> -        if (ttm->pages[i])

>>>> -            put_page(ttm->pages[i]);

>>>> -

>>>> +    for (i = 0; i < ttm->num_pages; ++i)

>>>>             ttm->pages[i] = pages ? pages[i] : NULL;

>>>> -    }

>>>>     }

>>>>     /**

>>>> @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct

>>>> ttm_tt *ttm)

>>>>         /* unmap the pages mapped to the device */

>>>>         dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);

>>>> -    /* mark the pages as dirty */

>>>> -    amdgpu_ttm_tt_mark_user_pages(ttm);

>>>> -

>>>>         sg_free_table(ttm->sg);

>>>> +

>>>> +    if (gtt->range.pfns &&

>>>> +        ttm->pages[0] == hmm_pfn_to_page(&gtt->range,

>>>> gtt->range.pfns[0]))

>>>> +        WARN_ONCE(1, "Missing get_user_page_done\n");

>>>>     }

>>>>     int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,

>>>> @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt

>>>> *ttm, uint64_t addr,

>>>>         spin_lock_init(&gtt->guptasklock);

>>>>         INIT_LIST_HEAD(&gtt->guptasks);

>>>> -    atomic_set(&gtt->mmu_invalidations, 0);

>>>> -    gtt->last_set_pages = 0;

>>>>         return 0;

>>>>     }

>>>> @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt

>>>> *ttm, unsigned long start,

>>>>                       unsigned long end)

>>>>     {

>>>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>> -    struct amdgpu_ttm_gup_task_list *entry;

>>>>         unsigned long size;

>>>>         if (gtt == NULL || !gtt->userptr)

>>>> @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct

>>>> ttm_tt *ttm, unsigned long start,

>>>>         if (gtt->userptr > end || gtt->userptr + size <= start)

>>>>             return false;

>>>> -    /* Search the lists of tasks that hold this mapping and see

>>>> -     * if current is one of them.  If it is return false.

>>>> -     */

>>>> -    spin_lock(&gtt->guptasklock);

>>>> -    list_for_each_entry(entry, &gtt->guptasks, list) {

>>>> -        if (entry->task == current) {

>>>> -            spin_unlock(&gtt->guptasklock);

>>>> -            return false;

>>>> -        }

>>>> -    }

>>>> -    spin_unlock(&gtt->guptasklock);

>>>> -

>>>> -    atomic_inc(&gtt->mmu_invalidations);

>>>> -

>>>>         return true;

>>>>     }

>>>>     /**

>>>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been

>>>> invalidated?

>>>> - */

>>>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>>>> -                       int *last_invalidated)

>>>> -{

>>>> -    struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>> -    int prev_invalidated = *last_invalidated;

>>>> -

>>>> -    *last_invalidated = atomic_read(&gtt->mmu_invalidations);

>>>> -    return prev_invalidated != *last_invalidated;

>>>> -}

>>>> -

>>>> -/**

>>>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this

>>>> ttm_tt object

>>>> - * been invalidated since the last time they've been set?

>>>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?

>>>>      */

>>>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)

>>>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)

>>>>     {

>>>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;

>>>>         if (gtt == NULL || !gtt->userptr)

>>>>             return false;

>>>> -    return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;

>>>> +    return true;

>>>>     }

>>>>     /**

>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>>> index b5b2d101f7db..8988c87fff9d 100644

>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

>>>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object

>>>> *bo);

>>>>     int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);

>>>>     int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page

>>>> **pages);

>>>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);

>>>>     void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page

>>>> **pages);

>>>>     void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);

>>>>     int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,

>>>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt

>>>> *ttm, unsigned long start,

>>>>                       unsigned long end);

>>>>     bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,

>>>>                            int *last_invalidated);

>>>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);

>>>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);

>>>>     bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);

>>>>     uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct

>>>> ttm_mem_reg *mem);

>>>>     uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct

>>>> ttm_tt *ttm,

>
Am 05.02.19 um 22:56 schrieb Yang, Philip:
> Hi Christian,
>
> I will submit new patch for review, my comments embedded inline below.
>
> Thanks,
> Philip
>
> On 2019-02-05 1:09 p.m., Koenig, Christian wrote:
>> Am 05.02.19 um 18:25 schrieb Yang, Philip:
>>> [SNIP]+
>>>>> +    if (r == -ERESTARTSYS) {
>>>>> +        if (!--tries) {
>>>>> +            DRM_ERROR("Possible deadlock? Retry too many times\n");
>>>>> +            return -EDEADLK;
>>>>> +        }
>>>>> +        goto restart;
>>>> You really need to restart the IOCTL to potentially handle signals.
>>>>
>>>> But since the calling code correctly handles this already you can just
>>>> drop this change as far as I can see.
>>>>
>>> I agree that we should return -ERESTARTSYS to upper layer to handle signals.
>>>
>>> But I do not find upper layers handle -ERESTARTSYS in the entire calling
>>> path, ksys_ioctl -> do_vfs_ioctl -> amdgpu_drm_ioctl -> drm->ioctl ->
>>> drm_ioctl_kernel -> amdgpu_cs_ioctl. The error code returns to
>>> application. I confirm it, libdrm userptr test application calling
>>> amdgpu_cs_ioctl return code is -512, which is -ERESTARTSYS.
>>>
>>> So application should handle -ERESTARTSYS to restart the ioctl, but
>>> libdrm userptr test application doesn't handle this. This causes the
>>> test failed.
>> This is a bug in the test cases then.
>>
>> -ERESTARTSYS can happen at any time during interruptible waiting and it
>> is mandatory for the upper layer to handle it correctly.
>>
> -ERESTARTSYS can be returned only when signal is pending, signal handler
> will translate ERESTARTSYS to EINTR, drmIoctl in libdrm does handle
> EINTR and restart the ioctl. The test cases are ok.

Interesting, I thought that this was handled unconditionally if a signal 
is pending or not. But that could as well be architecture specific.

> Driver fail path should not return ERESTARTSYS to user space. The new
> patch, I change amdgpu_cs_submit to return -EAGAIN if userptr is
> updated, and amdgpu_cs_ioctl redo the ioctl only if error code is
> -EAGAIN. ERESTARTSYS error code returns to user space for signal handle
> as before.

Good idea, that is probably cleaner. EGAIN and EINTR should have the 
same effect in drmIoctl IIRC.

>>> Below are details of userptr path difference. For the previous path,
>>> libdrm test always goes to step 2, step 3 never trigger. So it never
>>> return -ERESTARTSYS, but in theory, this could happen.
>>>
>>> For HMM path, the test always goes to step 3, we have to handle this
>>> case inside amdgpu_cs_ioctl. Maybe I can change amdgpu_cs_submit to
>>> return -EBUSY, then restart the ioctl inside amdgpu_cs_ioctl. I will
>>> submit new patch.
>> Clearly a NAK, this won't work correctly.
>>
> I don't understand your concern, may you explain the reason?

When some underlying code returns ERESTARTSYS we *MUST* forward that to 
the upper layer.

Handling it in a loop or replacing it manually with EINTR or EGAIN will 
break signal handling.

This is suggested quite often, but can end up in an endless loop inside 
the kernel which is quite ugly.

Regards,
Christian.