drm/amdgpu: handle all fragment sizes v2

Submitted by Christian König on Aug. 30, 2017, 1:25 p.m.

Details

Message ID 1504099501-1810-1-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "drm/amdgpu: handle all fragment sizes v2" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Christian König Aug. 30, 2017, 1:25 p.m.
From: Roger He <Hongbo.He@amd.com>

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit

Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 70 +++++++++++++++++++++++-----------
 1 file changed, 47 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4cdfb70..04815a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@  static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
-	int r;
-
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,65 @@  static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
+	unsigned max_frag = params->adev->vm_manager.fragment_size;
+	uint64_t frag_flags, frag_end;
+	unsigned frag;
 
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	int r;
 
 	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-	/* handle the 4K area at the beginning */
-	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+	/* Handle the fragments at the beginning */
+	while (start != end) {
+		/* This intentionally wraps around if no bit is set */
+		frag = min(ffs(start), fls64(end - start)) - 1;
+		if (frag >= max_frag)
+			break;
+
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
 		if (r)
 			return r;
-		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
 
 	/* handle the area in the middle */
-	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-				  flags | frag_flags);
-	if (r)
-		return r;
+	if (start != end) {
+		frag_flags = AMDGPU_PTE_FRAG(max_frag);
+		frag_end = end & ~((1 << max_frag) - 1);
 
-	/* handle the 4K area at the end */
-	if (frag_end != end) {
-		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
-	return r;
+
+	/* Handle the fragments at the end */
+	while (start != end) {
+		frag = fls64(end - start) - 1;
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end,
+					  dst, flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
+	}
+
+	return 0;
 }
 
 /**