[1/2] drm/amdgpu: increase fragmentation size for Vega10 v2

Submitted by Christian König on July 19, 2017, 3:26 p.m.

Details

Message ID 1500478012-20842-1-git-send-email-deathsimple@vodafone.de
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Christian König July 19, 2017, 3:26 p.m.
From: Christian König <christian.koenig@amd.com>

The fragment bits work differently for Vega10 compared to previous generations.

Increase the fragment size to 2MB for now to better handle that.

v2: handle the hardware setup as well

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 4 +++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 4 +++-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 4 +++-
 5 files changed, 15 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7a8da32..fc77844 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -588,8 +588,9 @@  static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
 		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
-		dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
-					     AMDGPU_GPU_PAGE_SIZE;
+		dev_info.pte_fragment_size =
+			(1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
+			AMDGPU_GPU_PAGE_SIZE;
 		dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
 
 		dev_info.cu_active_number = adev->gfx.cu_info.number;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 55d1c7f..a3dbebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1381,8 +1381,9 @@  static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 */
 
 	/* SI and newer are optimized for 64KB */
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
-	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+	unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
+	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
+	uint64_t frag_align = 1 << pages_per_frag;
 
 	uint64_t frag_start = ALIGN(start, frag_align);
 	uint64_t frag_end = end & ~(frag_align - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 3441ec5..c4f5d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -51,7 +51,9 @@  struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
 
 /* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
+	((adev)->asic_type < CHIP_VEGA10 ? 4 : \
+	 (adev)->vm_manager.block_size)
 
 #define AMDGPU_PTE_VALID	(1ULL << 0)
 #define AMDGPU_PTE_SYSTEM	(1ULL << 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 008ad3d..408723e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -129,7 +129,7 @@  static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	/* Setup L2 cache */
 	tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
 	/* XXX for emulation, Refer to closed source code.*/
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
 			    0);
@@ -144,6 +144,8 @@  static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 96f1628..ad8def3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -143,7 +143,7 @@  static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	/* Setup L2 cache */
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
 	/* XXX for emulation, Refer to closed source code.*/
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
 			    0);
@@ -158,6 +158,8 @@  static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;

Comments

On 17-07-19 11:26 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The fragment bits work differently for Vega10 compared to previous generations.
>
> Increase the fragment size to 2MB for now to better handle that.
>
> v2: handle the hardware setup as well
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 4 +++-
>  5 files changed, 15 insertions(+), 7 deletions(-)
>
>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
>  #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
>  
>  /* LOG2 number of continuous pages for the fragment field */
> -#define AMDGPU_LOG2_PAGES_PER_FRAG 4
> +#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
> +	((adev)->asic_type < CHIP_VEGA10 ? 4 : \
> +	 (adev)->vm_manager.block_size)

With the hardware configuration changed for 256KB BigK TLBs, should this
be changed to 256KB fragments on GFX9 now to get another small TLB boost
for allocations smaller than 2MB?

Regards,
  Felix
Am 03.08.2017 um 23:01 schrieb Felix Kuehling:
> On 17-07-19 11:26 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> The fragment bits work differently for Vega10 compared to previous generations.
>>
>> Increase the fragment size to 2MB for now to better handle that.
>>
>> v2: handle the hardware setup as well
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 5 +++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 5 +++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 4 +++-
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 4 +++-
>>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 4 +++-
>>   5 files changed, 15 insertions(+), 7 deletions(-)
>>
>>
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
>>   #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
>>   
>>   /* LOG2 number of continuous pages for the fragment field */
>> -#define AMDGPU_LOG2_PAGES_PER_FRAG 4
>> +#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
>> +	((adev)->asic_type < CHIP_VEGA10 ? 4 : \
>> +	 (adev)->vm_manager.block_size)
> With the hardware configuration changed for 256KB BigK TLBs, should this
> be changed to 256KB fragments on GFX9 now to get another small TLB boost
> for allocations smaller than 2MB?

As far as I understand it the answer is no. The fragment field on Vega10 
only affects the L1, not the L2 any more.

And we need to report 2MB as fragment size to userspace to get the BigK 
effect on the TLB.

Well what we could do is changing the fragment generation a bit make the 
L1 be used more efficient.

I've tried that for gfx8 once, but that unfortunately backfired because 
it affected the L2 as well, but for gfx9 that should now work fine.

Christian.

>
> Regards,
>    Felix
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx