[2/2] drm/amdgpu: enable huge page handling in the VM v5

Submitted by Christian König on July 19, 2017, 3:26 p.m.

Details

Message ID 1500478012-20842-2-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Christian König July 19, 2017, 3:26 p.m.
From: Christian König <christian.koenig@amd.com>

The hardware can use huge pages to map 2MB of address space with only one PDE.

v2: few cleanups and rebased
v3: skip PT updates if we are using the PDE
v4: rebased, added support for CPU based updates
v5: fix CPU based updates once more

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 +++++++++++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   4 ++
 2 files changed, 103 insertions(+), 20 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a3dbebe..950777e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -351,6 +351,7 @@  static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 			entry->bo = pt;
 			entry->addr = 0;
+			entry->huge_page = false;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1116,7 +1117,8 @@  static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		if (parent->entries[pt_idx].addr == pt)
+		if (parent->entries[pt_idx].addr == pt ||
+		    parent->entries[pt_idx].huge_page)
 			continue;
 
 		parent->entries[pt_idx].addr = pt;
@@ -1257,29 +1259,95 @@  int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_find_entry - find the entry for an address
  *
  * @p: see amdgpu_pte_update_params definition
  * @addr: virtual address in question
+ * @entry: resulting entry or NULL
+ * @parent: parent entry
  *
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Find the vm_pt entry and it's parent for the given address.
  */
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
-					  uint64_t addr)
+void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
+			 struct amdgpu_vm_pt **entry,
+			 struct amdgpu_vm_pt **parent)
 {
-	struct amdgpu_vm_pt *entry = &p->vm->root;
 	unsigned idx, level = p->adev->vm_manager.num_level;
 
-	while (entry->entries) {
+	*parent = NULL;
+	*entry = &p->vm->root;
+	while ((*entry)->entries) {
 		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size(entry->bo) / 8;
-		entry = &entry->entries[idx];
+		idx %= amdgpu_bo_size((*entry)->bo) / 8;
+		*parent = *entry;
+		*entry = &(*entry)->entries[idx];
 	}
 
 	if (level)
-		return NULL;
+		*entry = NULL;
+}
+
+/**
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
+ *
+ * @p: see amdgpu_pte_update_params definition
+ * @entry: vm_pt entry to check
+ * @parent: parent entry
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ *
+ * Check if we can update the PD with a huge page.
+ */
+static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+				       struct amdgpu_vm_pt *entry,
+				       struct amdgpu_vm_pt *parent,
+				       unsigned nptes, uint64_t dst,
+				       uint64_t flags)
+{
+	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
+	uint64_t pd_addr, pde;
+	int r;
 
-	return entry->bo;
+	/* In the case of a mixed PT the PDE must point to it*/
+	if (p->adev->asic_type < CHIP_VEGA10 ||
+	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+	    p->func == amdgpu_vm_do_copy_ptes ||
+	    !(flags & AMDGPU_PTE_VALID)) {
+
+		dst = amdgpu_bo_gpu_offset(entry->bo);
+		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
+		flags = AMDGPU_PTE_VALID;
+	} else {
+		flags |= AMDGPU_PDE_PTE;
+	}
+
+	if (entry->addr == dst &&
+	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+		return 0;
+
+	entry->addr = dst;
+	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+	if (use_cpu_update) {
+		r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
+		if (r)
+			return r;
+
+		pde = pd_addr + (entry - parent->entries) * 8;
+		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
+	} else {
+		if (parent->bo->shadow) {
+			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+			pde = pd_addr + (entry - parent->entries) * 8;
+			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+		}
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pde = pd_addr + (entry - parent->entries) * 8;
+		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+	}
+
+	return 0;
 }
 
 /**
@@ -1306,22 +1374,31 @@  static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_bo *pt;
 	unsigned nptes;
 	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
-
+	int r;
 
 	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; addr += nptes) {
-		pt = amdgpu_vm_get_pt(params, addr);
-		if (!pt) {
-			pr_err("PT not found, aborting update_ptes\n");
-			return -EINVAL;
-		}
+	for (addr = start; addr < end; addr += nptes,
+	     dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
+		struct amdgpu_vm_pt *entry, *parent;
+
+		amdgpu_vm_get_entry(params, addr, &entry, &parent);
+		if (!entry)
+			return -ENOENT;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
+		r = amdgpu_vm_handle_huge_pages(params, entry, parent,
+						nptes, dst, flags);
+		if (r)
+			return r;
 
+		if (entry->huge_page)
+			continue;
+
+		pt = entry->bo;
 		if (use_cpu_update) {
 			pe_start = (unsigned long)pt->kptr;
 		} else {
@@ -1337,8 +1414,6 @@  static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		pe_start += (addr & mask) * 8;
 		params->func(params, pe_start, dst, nptes,
 			     AMDGPU_GPU_PAGE_SIZE, flags);
-
-		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
 	return 0;
@@ -1490,6 +1565,9 @@  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
+	/* one PDE write for each huge page */
+	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
 	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
@@ -1569,6 +1647,7 @@  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
+	amdgpu_vm_invalidate_level(&vm->root);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c4f5d1f..34d9174 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -70,6 +70,9 @@  struct amdgpu_bo_list_entry;
 /* TILED for VEGA10, reserved for older ASICs  */
 #define AMDGPU_PTE_PRT		(1ULL << 51)
 
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE		(1ULL << 54)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
@@ -100,6 +103,7 @@  struct amdgpu_bo_list_entry;
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
+	bool			huge_page;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt	*entries;

Comments

Two nit-picks inline. Otherwise this series is Reviewed-and-tested-by:
Felix Kuehling <Felix.Kuehling@amd.com>

That said, I was not able to measure any performance improvement on
Vega10 using mixbench yet. I checked that the same test is sensitive to
TLB optimizations on Fiji. So probably we're still missing something.

Regards,
 Felix


On 17-07-19 11:26 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> v2: few cleanups and rebased
> v3: skip PT updates if we are using the PDE
> v4: rebased, added support for CPU based updates
> v5: fix CPU based updates once more
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 +++++++++++++++++++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   4 ++
>  2 files changed, 103 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a3dbebe..950777e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -351,6 +351,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>  
>  			entry->bo = pt;
>  			entry->addr = 0;
> +			entry->huge_page = false;
>  		}
>  
>  		if (level < adev->vm_manager.num_level) {
> @@ -1116,7 +1117,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>  
>  		pt = amdgpu_bo_gpu_offset(bo);
>  		pt = amdgpu_gart_get_vm_pde(adev, pt);
> -		if (parent->entries[pt_idx].addr == pt)
> +		if (parent->entries[pt_idx].addr == pt ||
> +		    parent->entries[pt_idx].huge_page)
>  			continue;
>  
>  		parent->entries[pt_idx].addr = pt;
> @@ -1257,29 +1259,95 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  }
>  
>  /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_find_entry - find the entry for an address
>   *
>   * @p: see amdgpu_pte_update_params definition
>   * @addr: virtual address in question
> + * @entry: resulting entry or NULL
> + * @parent: parent entry
>   *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Find the vm_pt entry and it's parent for the given address.
>   */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> -					  uint64_t addr)
> +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
> +			 struct amdgpu_vm_pt **entry,
> +			 struct amdgpu_vm_pt **parent)
>  {
> -	struct amdgpu_vm_pt *entry = &p->vm->root;
>  	unsigned idx, level = p->adev->vm_manager.num_level;
>  
> -	while (entry->entries) {
> +	*parent = NULL;
> +	*entry = &p->vm->root;
> +	while ((*entry)->entries) {
>  		idx = addr >> (p->adev->vm_manager.block_size * level--);
> -		idx %= amdgpu_bo_size(entry->bo) / 8;
> -		entry = &entry->entries[idx];
> +		idx %= amdgpu_bo_size((*entry)->bo) / 8;
> +		*parent = *entry;
> +		*entry = &(*entry)->entries[idx];
>  	}
>  
>  	if (level)
> -		return NULL;
> +		*entry = NULL;
> +}
> +
> +/**
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
> + *
> + * @p: see amdgpu_pte_update_params definition
> + * @entry: vm_pt entry to check
> + * @parent: parent entry
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + *
> + * Check if we can update the PD with a huge page.
> + */
> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> +				       struct amdgpu_vm_pt *entry,
> +				       struct amdgpu_vm_pt *parent,
> +				       unsigned nptes, uint64_t dst,
> +				       uint64_t flags)
> +{
> +	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
> +	uint64_t pd_addr, pde;
> +	int r;
>  
> -	return entry->bo;
> +	/* In the case of a mixed PT the PDE must point to it*/
> +	if (p->adev->asic_type < CHIP_VEGA10 ||
> +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> +	    p->func == amdgpu_vm_do_copy_ptes ||
> +	    !(flags & AMDGPU_PTE_VALID)) {
> +
> +		dst = amdgpu_bo_gpu_offset(entry->bo);
> +		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
> +		flags = AMDGPU_PTE_VALID;
> +	} else {
> +		flags |= AMDGPU_PDE_PTE;
> +	}
> +
> +	if (entry->addr == dst &&
> +	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> +		return 0;
> +
> +	entry->addr = dst;
> +	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> +	if (use_cpu_update) {
> +		r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
> +		if (r)
> +			return r;
> +
> +		pde = pd_addr + (entry - parent->entries) * 8;
> +		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
> +	} else {
> +		if (parent->bo->shadow) {
> +			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> +			pde = pd_addr + (entry - parent->entries) * 8;
> +			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> +		}
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		pde = pd_addr + (entry - parent->entries) * 8;
> +		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> +	}
> +
> +	return 0;
>  }
>  
>  /**
> @@ -1306,22 +1374,31 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>  	struct amdgpu_bo *pt;
>  	unsigned nptes;
>  	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
> -
> +	int r;
>  
>  	/* walk over the address space and update the page tables */
> -	for (addr = start; addr < end; addr += nptes) {
> -		pt = amdgpu_vm_get_pt(params, addr);
> -		if (!pt) {
> -			pr_err("PT not found, aborting update_ptes\n");
> -			return -EINVAL;
> -		}
> +	for (addr = start; addr < end; addr += nptes,
> +	     dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
> +		struct amdgpu_vm_pt *entry, *parent;
> +
> +		amdgpu_vm_get_entry(params, addr, &entry, &parent);
> +		if (!entry)
> +			return -ENOENT;

ENOENT means "no such file or directory". Not sure that makes sense
outside of file system code.

>  
>  		if ((addr & ~mask) == (end & ~mask))
>  			nptes = end - addr;
>  		else
>  			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>  
> +		r = amdgpu_vm_handle_huge_pages(params, entry, parent,
> +						nptes, dst, flags);
> +		if (r)
> +			return r;
>  
> +		if (entry->huge_page)
> +			continue;
> +
> +		pt = entry->bo;
>  		if (use_cpu_update) {
>  			pe_start = (unsigned long)pt->kptr;
>  		} else {
> @@ -1337,8 +1414,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>  		pe_start += (addr & mask) * 8;
>  		params->func(params, pe_start, dst, nptes,
>  			     AMDGPU_GPU_PAGE_SIZE, flags);
> -
> -		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
>  	}
>  
>  	return 0;
> @@ -1490,6 +1565,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	/* padding, etc. */
>  	ndw = 64;
>  
> +	/* one PDE write for each huge page */
> +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;

I think page directory updates only need 6 dwords. That's what
update_level uses anyway.

> +
>  	if (src) {
>  		/* only copy commands needed */
>  		ndw += ncmds * 7;
> @@ -1569,6 +1647,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  
>  error_free:
>  	amdgpu_job_free(job);
> +	amdgpu_vm_invalidate_level(&vm->root);
>  	return r;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index c4f5d1f..34d9174 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
>  /* TILED for VEGA10, reserved for older ASICs  */
>  #define AMDGPU_PTE_PRT		(1ULL << 51)
>  
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE		(1ULL << 54)
> +
>  /* VEGA10 only */
>  #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>  #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> @@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry;
>  struct amdgpu_vm_pt {
>  	struct amdgpu_bo	*bo;
>  	uint64_t		addr;
> +	bool			huge_page;
>  
>  	/* array of page tables, one for each directory entry */
>  	struct amdgpu_vm_pt	*entries;