[3/3] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

Submitted by Zeng, Oak on Feb. 1, 2019, 5:09 p.m.

Details

Message ID BL0PR12MB258079C9BA537E492564CDC580920@BL0PR12MB2580.namprd12.prod.outlook.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Zeng, Oak Feb. 1, 2019, 5:09 p.m.
Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>. Patch looks good to me. It is better Felix to double confirm. 


Thanks,
Oak

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong

Sent: Thursday, January 31, 2019 5:27 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhao, Yong <Yong.Zhao@amd.com>
Subject: [PATCH 3/3] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

We can directly calculate the sdma doorbell index in the process doorbell pages through the doorbell_index structure in amdgpu_device, so no need to cache them in kgd2kfd_shared_resources any more, resulting in more portable code.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 55 ++++++-------------
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++++--
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
 3 files changed, 31 insertions(+), 44 deletions(-)

--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 13710f34191a..f050adc3f5da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@  static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)  {
-	int i, n;
+	int i;
 	int last_valid_bit;
 
 	if (adev->kfd.dev) {
@@ -142,7 +142,9 @@  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
 					  AMDGPU_GMC_HOLE_START),
-			.drm_render_minor = adev->ddev->render->index
+			.drm_render_minor = adev->ddev->render->index,
+			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
 		};
 
 		/* this is going to have a few of the MSBs set that we need to @@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
-		if (adev->asic_type < CHIP_VEGA10) {
-			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
-			return;
-		}
-
-		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-		for (i = 0; i < n; i += 2) {
-			/* On SOC15 the BIF is involved in routing
-			 * doorbells using the low 12 bits of the
-			 * address. Communicate the assignments to
-			 * KFD. KFD uses two doorbell pages per
-			 * process in case of 64-bit doorbells so we
-			 * can use each doorbell assignment twice.
+		if (adev->asic_type >= CHIP_VEGA10) {
+			/* Because of the setting in registers like
+			 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+			 * lower 12 bits of doorbell address for routing, in
+			 * order to route the CP queue doorbells to CP engine,
+			 * the doorbells allocated to CP queues have to be
+			 * outside the range set for SDMA, VCN, and IH blocks
+			 * Prior to SOC15, all queues use queue ID to
+			 * determine doorbells.
 			 */
-			gpu_resources.sdma_doorbell[0][i] =
-				adev->doorbell_index.sdma_engine[0] + (i >> 1);
-			gpu_resources.sdma_doorbell[0][i+1] =
-				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i] =
-				adev->doorbell_index.sdma_engine[1] + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i+1] =
-				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
+			gpu_resources.reserved_doorbells_start =
+					adev->doorbell_index.sdma_engine[0];
+			gpu_resources.reserved_doorbells_end =
+					adev->doorbell_index.last_non_cp;
 		}
 
-		/* Because of the setting in registers like
-		 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
-		 * lower 12 bits of doorbell address for routing, in
-		 * order to route the CP queue doorbells to CP engine,
-		 * the doorbells allocated to CP queues have to be
-		 * outside the range set for SDMA, VCN, and IH blocks
-		 * Prior to SOC15, all queues use queue ID to
-		 * determine doorbells.
-		 */
-		gpu_resources.reserved_doorbells_start =
-				adev->doorbell_index.sdma_engine[0];
-		gpu_resources.reserved_doorbells_end =
-				adev->doorbell_index.last_non_cp;
-
 		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..81280ce5aa27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,20 @@  static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 		 */
 		q->doorbell_id = q->properties.queue_id;
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		/* For SDMA queues on SOC15, use static doorbell
-		 * assignments based on the engine and queue.
+		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
+		 * doorbell assignments based on the engine and queue id.
+		 * The doobell index distance between RLC (2*i) and (2*i+1)
+		 * for a SDMA engine is 512.
+		 * 512 8-byte doorbell distance (i.e. one page away) ensures
+		 * that SDMA RLC (2*i+1) doorbell lies exactly in the doorbell
+		 * OFFSET and SIZE set in register BIF_SDMA0_DOORBELL_RANGE.
 		 */
-		q->doorbell_id = dev->shared_resources.sdma_doorbell
-			[q->properties.sdma_engine_id]
-			[q->properties.sdma_queue_id];
+		unsigned int *idx_offset =
+				dev->shared_resources.sdma_doorbell_idx;
+		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
+			+ (q->properties.sdma_queue_id >> 1)
+			+ (q->properties.sdma_queue_id % 2)
+			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET;
 	} else {
 		/* For CP queues on SOC15 reserve a free doorbell ID */
 		unsigned int found;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1bf45419d93..3559170f6fb3 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -137,7 +137,7 @@  struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means Queue n is available for KFD */
 	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
 
-	unsigned int sdma_doorbell[2][8];
+	unsigned int *sdma_doorbell_idx;
 
 	/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
 	 * and VCN