[5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

Submitted by Zhao, Yong on Feb. 5, 2019, 8:31 p.m.

Details

Message ID 20190205203047.22586-5-Yong.Zhao@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 3 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Zhao, Yong Feb. 5, 2019, 8:31 p.m.
We can directly calculate the sdma doorbell index in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more, resulting in more
portable code.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 55 ++++++-------------
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++++--
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
 3 files changed, 31 insertions(+), 44 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index ee8527701731..e62c3703169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@  static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	int i, n;
+	int i;
 	int last_valid_bit;
 
 	if (adev->kfd.dev) {
@@ -142,7 +142,9 @@  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
 					  AMDGPU_GMC_HOLE_START),
-			.drm_render_minor = adev->ddev->render->index
+			.drm_render_minor = adev->ddev->render->index,
+			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
 		};
 
 		/* this is going to have a few of the MSBs set that we need to
@@ -172,45 +174,22 @@  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
-		if (adev->asic_type < CHIP_VEGA10) {
-			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
-			return;
-		}
-
-		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-		for (i = 0; i < n; i += 2) {
-			/* On SOC15 the BIF is involved in routing
-			 * doorbells using the low 12 bits of the
-			 * address. Communicate the assignments to
-			 * KFD. KFD uses two doorbell pages per
-			 * process in case of 64-bit doorbells so we
-			 * can use each doorbell assignment twice.
+		if (adev->asic_type >= CHIP_VEGA10) {
+			/* Because of the setting in registers like
+			 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+			 * lower 12 bits of doorbell address for routing. In
+			 * order to route the CP queue doorbells to CP engine,
+			 * the doorbells allocated to CP queues have to be
+			 * outside the range set for SDMA, VCN, and IH blocks
+			 * Prior to SOC15, all queues use queue ID to
+			 * determine doorbells.
 			 */
-			gpu_resources.sdma_doorbell[0][i] =
-				adev->doorbell_index.sdma_engine[0] + (i >> 1);
-			gpu_resources.sdma_doorbell[0][i+1] =
-				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i] =
-				adev->doorbell_index.sdma_engine[1] + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i+1] =
-				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
+			gpu_resources.reserved_doorbells_start =
+					adev->doorbell_index.sdma_engine[0];
+			gpu_resources.reserved_doorbells_end =
+					adev->doorbell_index.last_non_cp;
 		}
 
-		/* Because of the setting in registers like
-		 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
-		 * lower 12 bits of doorbell address for routing. In
-		 * order to route the CP queue doorbells to CP engine,
-		 * the doorbells allocated to CP queues have to be
-		 * outside the range set for SDMA, VCN, and IH blocks
-		 * Prior to SOC15, all queues use queue ID to
-		 * determine doorbells.
-		 */
-		gpu_resources.reserved_doorbells_start =
-				adev->doorbell_index.sdma_engine[0];
-		gpu_resources.reserved_doorbells_end =
-				adev->doorbell_index.last_non_cp;
-
 		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..81280ce5aa27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,20 @@  static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 		 */
 		q->doorbell_id = q->properties.queue_id;
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		/* For SDMA queues on SOC15, use static doorbell
-		 * assignments based on the engine and queue.
+		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
+		 * doorbell assignments based on the engine and queue id.
+		 * The doobell index distance between RLC (2*i) and (2*i+1)
+		 * for a SDMA engine is 512.
+		 * 512 8-byte doorbell distance (i.e. one page away) ensures
+		 * that SDMA RLC (2*i+1) doorbell lies exactly in the doorbell
+		 * OFFSET and SIZE set in register BIF_SDMA0_DOORBELL_RANGE.
 		 */
-		q->doorbell_id = dev->shared_resources.sdma_doorbell
-			[q->properties.sdma_engine_id]
-			[q->properties.sdma_queue_id];
+		unsigned int *idx_offset =
+				dev->shared_resources.sdma_doorbell_idx;
+		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
+			+ (q->properties.sdma_queue_id >> 1)
+			+ (q->properties.sdma_queue_id % 2)
+			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET;
 	} else {
 		/* For CP queues on SOC15 reserve a free doorbell ID */
 		unsigned int found;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1bf45419d93..3559170f6fb3 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -137,7 +137,7 @@  struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means Queue n is available for KFD */
 	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
 
-	unsigned int sdma_doorbell[2][8];
+	unsigned int *sdma_doorbell_idx;
 
 	/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
 	 * and VCN

Comments

Some nit-picks inline. Looks good otherwise.

On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> We can directly calculate the sdma doorbell index in the process doorbell

> pages through the doorbell_index structure in amdgpu_device, so no need

> to cache them in kgd2kfd_shared_resources any more, resulting in more

> portable code.


What do you mean by "portable" here? Portable to what? Other 
architectures? Kernels? GPUs?


>

> Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49

> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>

> ---

>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 55 ++++++-------------

>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++++--

>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-

>   3 files changed, 31 insertions(+), 44 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

> index ee8527701731..e62c3703169a 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

> @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,

>   

>   void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)

>   {

> -	int i, n;

> +	int i;

>   	int last_valid_bit;

>   

>   	if (adev->kfd.dev) {

> @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)

>   			.gpuvm_size = min(adev->vm_manager.max_pfn

>   					  << AMDGPU_GPU_PAGE_SHIFT,

>   					  AMDGPU_GMC_HOLE_START),

> -			.drm_render_minor = adev->ddev->render->index

> +			.drm_render_minor = adev->ddev->render->index,

> +			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,

> +

>   		};

>   

>   		/* this is going to have a few of the MSBs set that we need to

> @@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)

>   				&gpu_resources.doorbell_aperture_size,

>   				&gpu_resources.doorbell_start_offset);

>   

> -		if (adev->asic_type < CHIP_VEGA10) {

> -			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);

> -			return;

> -		}

> -

> -		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;

> -

> -		for (i = 0; i < n; i += 2) {

> -			/* On SOC15 the BIF is involved in routing

> -			 * doorbells using the low 12 bits of the

> -			 * address. Communicate the assignments to

> -			 * KFD. KFD uses two doorbell pages per

> -			 * process in case of 64-bit doorbells so we

> -			 * can use each doorbell assignment twice.

> +		if (adev->asic_type >= CHIP_VEGA10) {

> +			/* Because of the setting in registers like

> +			 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the

> +			 * lower 12 bits of doorbell address for routing. In

> +			 * order to route the CP queue doorbells to CP engine,

> +			 * the doorbells allocated to CP queues have to be

> +			 * outside the range set for SDMA, VCN, and IH blocks

> +			 * Prior to SOC15, all queues use queue ID to

> +			 * determine doorbells.

>   			 */

> -			gpu_resources.sdma_doorbell[0][i] =

> -				adev->doorbell_index.sdma_engine[0] + (i >> 1);

> -			gpu_resources.sdma_doorbell[0][i+1] =

> -				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);

> -			gpu_resources.sdma_doorbell[1][i] =

> -				adev->doorbell_index.sdma_engine[1] + (i >> 1);

> -			gpu_resources.sdma_doorbell[1][i+1] =

> -				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);

> +			gpu_resources.reserved_doorbells_start =

> +					adev->doorbell_index.sdma_engine[0];

> +			gpu_resources.reserved_doorbells_end =

> +					adev->doorbell_index.last_non_cp;

>   		}

>   

> -		/* Because of the setting in registers like

> -		 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the

> -		 * lower 12 bits of doorbell address for routing. In

> -		 * order to route the CP queue doorbells to CP engine,

> -		 * the doorbells allocated to CP queues have to be

> -		 * outside the range set for SDMA, VCN, and IH blocks

> -		 * Prior to SOC15, all queues use queue ID to

> -		 * determine doorbells.

> -		 */

> -		gpu_resources.reserved_doorbells_start =

> -				adev->doorbell_index.sdma_engine[0];

> -		gpu_resources.reserved_doorbells_end =

> -				adev->doorbell_index.last_non_cp;

> -

>   		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);

>   	}

>   }

> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

> index 8372556b52eb..81280ce5aa27 100644

> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

> @@ -134,12 +134,20 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)

>   		 */

>   		q->doorbell_id = q->properties.queue_id;

>   	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {

> -		/* For SDMA queues on SOC15, use static doorbell

> -		 * assignments based on the engine and queue.

> +		/* For SDMA queues on SOC15 with 8-byte doorbell, use static

> +		 * doorbell assignments based on the engine and queue id.

> +		 * The doobell index distance between RLC (2*i) and (2*i+1)

> +		 * for a SDMA engine is 512.

> +		 * 512 8-byte doorbell distance (i.e. one page away) ensures

> +		 * that SDMA RLC (2*i+1) doorbell lies exactly in the doorbell

> +		 * OFFSET and SIZE set in register BIF_SDMA0_DOORBELL_RANGE.

>   		 */

> -		q->doorbell_id = dev->shared_resources.sdma_doorbell

> -			[q->properties.sdma_engine_id]

> -			[q->properties.sdma_queue_id];

> +		unsigned int *idx_offset =

> +				dev->shared_resources.sdma_doorbell_idx;

The type in struct amdgpu_doorbell_index is uint32_t. I'd prefer 
matching the type of the pointer here.


> +		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]

> +			+ (q->properties.sdma_queue_id >> 1)

> +			+ (q->properties.sdma_queue_id % 2)


Be consistent with the use of bit-wise operations (x >> 1 and x & 1) or 
division (x / 2 and x %2). Here you're mixing them. I prefer the 
bit-wise operations for division and modulo with powers of two.


> +			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET;

>   	} else {

>   		/* For CP queues on SOC15 reserve a free doorbell ID */

>   		unsigned int found;

> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h

> index b1bf45419d93..3559170f6fb3 100644

> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h

> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h

> @@ -137,7 +137,7 @@ struct kgd2kfd_shared_resources {

>   	/* Bit n == 1 means Queue n is available for KFD */

>   	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);

>   

> -	unsigned int sdma_doorbell[2][8];

> +	unsigned int *sdma_doorbell_idx;


The type in struct amdgpu_doorbell_index is uint32_t. I'd prefer 
matching the type of the pointer here.

Regards,
   Felix


>   

>   	/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,

>   	 * and VCN