drm/amdgpu: fix vulkan test performance drop and hang on VI

Submitted by Rex Zhu on July 3, 2017, 10:12 a.m.

Details

Message ID 1499076777-5170-1-git-send-email-Rex.Zhu@amd.com
State New
Headers show
Series "drm/amdgpu: fix vulkan test performance drop and hang on VI" ( rev: 2 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Rex Zhu July 3, 2017, 10:12 a.m.
caused by not program dynamic_cu_mask_addr in the KIQ MQD.

v2: create struct vi_mqd_allocation in FB which will contain
1. PM4 MQD structure.
2. Write Pointer Poll Memory.
3. Read Pointer Report Memory
4. Dynamic CU Mask.
5. Dynamic RB Mask.

Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    |  27 ++--
 drivers/gpu/drm/amd/include/vi_structs.h | 268 +++++++++++++++++++++++++++++++
 2 files changed, 285 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1a75ab1..452cc5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -40,7 +40,6 @@ 
 
 #include "bif/bif_5_0_d.h"
 #include "bif/bif_5_0_sh_mask.h"
-
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
@@ -2100,7 +2099,7 @@  static int gfx_v8_0_sw_init(void *handle)
 		return r;
 
 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
-	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));
+	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
 	if (r)
 		return r;
 
@@ -4715,9 +4714,6 @@  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
-	/* init the mqd struct */
-	memset(mqd, 0, sizeof(struct vi_mqd));
-
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4725,7 +4721,12 @@  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
-
+	if (!(adev->flags & AMD_IS_APU)) {
+		mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
+					     + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+		mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
+					     + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+	}
 	eop_base_addr = ring->eop_gpu_addr >> 8;
 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4900,7 +4901,7 @@  static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
@@ -4916,6 +4917,9 @@  static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
+		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
@@ -4929,7 +4933,7 @@  static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
 	}
 
 	return r;
@@ -4947,6 +4951,9 @@  static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
+		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
@@ -4954,11 +4961,11 @@  static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index b68f8ef..ca93b51 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -195,6 +195,274 @@  struct vi_mqd {
 	uint32_t compute_wave_restore_addr_lo;
 	uint32_t compute_wave_restore_addr_hi;
 	uint32_t compute_wave_restore_control;
+	uint32_t reserved9;
+	uint32_t reserved10;
+	uint32_t reserved11;
+	uint32_t reserved12;
+	uint32_t reserved13;
+	uint32_t reserved14;
+	uint32_t reserved15;
+	uint32_t reserved16;
+	uint32_t reserved17;
+	uint32_t reserved18;
+	uint32_t reserved19;
+	uint32_t reserved20;
+	uint32_t reserved21;
+	uint32_t reserved22;
+	uint32_t reserved23;
+	uint32_t reserved24;
+	uint32_t reserved25;
+	uint32_t reserved26;
+	uint32_t reserved27;
+	uint32_t reserved28;
+	uint32_t reserved29;
+	uint32_t reserved30;
+	uint32_t reserved31;
+	uint32_t reserved32;
+	uint32_t reserved33;
+	uint32_t reserved34;
+	uint32_t compute_user_data_0;
+	uint32_t compute_user_data_1;
+	uint32_t compute_user_data_2;
+	uint32_t compute_user_data_3;
+	uint32_t compute_user_data_4;
+	uint32_t compute_user_data_5;
+	uint32_t compute_user_data_6;
+	uint32_t compute_user_data_7;
+	uint32_t compute_user_data_8;
+	uint32_t compute_user_data_9;
+	uint32_t compute_user_data_10;
+	uint32_t compute_user_data_11;
+	uint32_t compute_user_data_12;
+	uint32_t compute_user_data_13;
+	uint32_t compute_user_data_14;
+	uint32_t compute_user_data_15;
+	uint32_t cp_compute_csinvoc_count_lo;
+	uint32_t cp_compute_csinvoc_count_hi;
+	uint32_t reserved35;
+	uint32_t reserved36;
+	uint32_t reserved37;
+	uint32_t cp_mqd_query_time_lo;
+	uint32_t cp_mqd_query_time_hi;
+	uint32_t cp_mqd_connect_start_time_lo;
+	uint32_t cp_mqd_connect_start_time_hi;
+	uint32_t cp_mqd_connect_end_time_lo;
+	uint32_t cp_mqd_connect_end_time_hi;
+	uint32_t cp_mqd_connect_end_wf_count;
+	uint32_t cp_mqd_connect_end_pq_rptr;
+	uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr;
+	uint32_t cp_mqd_connect_end_ib_rptr;
+	uint32_t reserved38;
+	uint32_t reserved39;
+	uint32_t cp_mqd_save_start_time_lo;
+	uint32_t cp_mqd_save_start_time_hi;
+	uint32_t cp_mqd_save_end_time_lo;
+	uint32_t cp_mqd_save_end_time_hi;
+	uint32_t cp_mqd_restore_start_time_lo;
+	uint32_t cp_mqd_restore_start_time_hi;
+	uint32_t cp_mqd_restore_end_time_lo;
+	uint32_t cp_mqd_restore_end_time_hi;
+	uint32_t disable_queue;
+	uint32_t reserved41;
+	uint32_t gds_cs_ctxsw_cnt0;
+	uint32_t gds_cs_ctxsw_cnt1;
+	uint32_t gds_cs_ctxsw_cnt2;
+	uint32_t gds_cs_ctxsw_cnt3;
+	uint32_t reserved42;
+	uint32_t reserved43;
+	uint32_t cp_pq_exe_status_lo;
+	uint32_t cp_pq_exe_status_hi;
+	uint32_t cp_packet_id_lo;
+	uint32_t cp_packet_id_hi;
+	uint32_t cp_packet_exe_status_lo;
+	uint32_t cp_packet_exe_status_hi;
+	uint32_t gds_save_base_addr_lo;
+	uint32_t gds_save_base_addr_hi;
+	uint32_t gds_save_mask_lo;
+	uint32_t gds_save_mask_hi;
+	uint32_t ctx_save_base_addr_lo;
+	uint32_t ctx_save_base_addr_hi;
+	uint32_t dynamic_cu_mask_addr_lo;
+	uint32_t dynamic_cu_mask_addr_hi;
+	uint32_t cp_mqd_base_addr_lo;
+	uint32_t cp_mqd_base_addr_hi;
+	uint32_t cp_hqd_active;
+	uint32_t cp_hqd_vmid;
+	uint32_t cp_hqd_persistent_state;
+	uint32_t cp_hqd_pipe_priority;
+	uint32_t cp_hqd_queue_priority;
+	uint32_t cp_hqd_quantum;
+	uint32_t cp_hqd_pq_base_lo;
+	uint32_t cp_hqd_pq_base_hi;
+	uint32_t cp_hqd_pq_rptr;
+	uint32_t cp_hqd_pq_rptr_report_addr_lo;
+	uint32_t cp_hqd_pq_rptr_report_addr_hi;
+	uint32_t cp_hqd_pq_wptr_poll_addr_lo;
+	uint32_t cp_hqd_pq_wptr_poll_addr_hi;
+	uint32_t cp_hqd_pq_doorbell_control;
+	uint32_t cp_hqd_pq_wptr;
+	uint32_t cp_hqd_pq_control;
+	uint32_t cp_hqd_ib_base_addr_lo;
+	uint32_t cp_hqd_ib_base_addr_hi;
+	uint32_t cp_hqd_ib_rptr;
+	uint32_t cp_hqd_ib_control;
+	uint32_t cp_hqd_iq_timer;
+	uint32_t cp_hqd_iq_rptr;
+	uint32_t cp_hqd_dequeue_request;
+	uint32_t cp_hqd_dma_offload;
+	uint32_t cp_hqd_sema_cmd;
+	uint32_t cp_hqd_msg_type;
+	uint32_t cp_hqd_atomic0_preop_lo;
+	uint32_t cp_hqd_atomic0_preop_hi;
+	uint32_t cp_hqd_atomic1_preop_lo;
+	uint32_t cp_hqd_atomic1_preop_hi;
+	uint32_t cp_hqd_hq_status0;
+	uint32_t cp_hqd_hq_control0;
+	uint32_t cp_mqd_control;
+	uint32_t cp_hqd_hq_status1;
+	uint32_t cp_hqd_hq_control1;
+	uint32_t cp_hqd_eop_base_addr_lo;
+	uint32_t cp_hqd_eop_base_addr_hi;
+	uint32_t cp_hqd_eop_control;
+	uint32_t cp_hqd_eop_rptr;
+	uint32_t cp_hqd_eop_wptr;
+	uint32_t cp_hqd_eop_done_events;
+	uint32_t cp_hqd_ctx_save_base_addr_lo;
+	uint32_t cp_hqd_ctx_save_base_addr_hi;
+	uint32_t cp_hqd_ctx_save_control;
+	uint32_t cp_hqd_cntl_stack_offset;
+	uint32_t cp_hqd_cntl_stack_size;
+	uint32_t cp_hqd_wg_state_offset;
+	uint32_t cp_hqd_ctx_save_size;
+	uint32_t cp_hqd_gds_resource_state;
+	uint32_t cp_hqd_error;
+	uint32_t cp_hqd_eop_wptr_mem;
+	uint32_t cp_hqd_eop_dones;
+	uint32_t reserved46;
+	uint32_t reserved47;
+	uint32_t reserved48;
+	uint32_t reserved49;
+	uint32_t reserved50;
+	uint32_t reserved51;
+	uint32_t reserved52;
+	uint32_t reserved53;
+	uint32_t reserved54;
+	uint32_t reserved55;
+	uint32_t iqtimer_pkt_header;
+	uint32_t iqtimer_pkt_dw0;
+	uint32_t iqtimer_pkt_dw1;
+	uint32_t iqtimer_pkt_dw2;
+	uint32_t iqtimer_pkt_dw3;
+	uint32_t iqtimer_pkt_dw4;
+	uint32_t iqtimer_pkt_dw5;
+	uint32_t iqtimer_pkt_dw6;
+	uint32_t iqtimer_pkt_dw7;
+	uint32_t iqtimer_pkt_dw8;
+	uint32_t iqtimer_pkt_dw9;
+	uint32_t iqtimer_pkt_dw10;
+	uint32_t iqtimer_pkt_dw11;
+	uint32_t iqtimer_pkt_dw12;
+	uint32_t iqtimer_pkt_dw13;
+	uint32_t iqtimer_pkt_dw14;
+	uint32_t iqtimer_pkt_dw15;
+	uint32_t iqtimer_pkt_dw16;
+	uint32_t iqtimer_pkt_dw17;
+	uint32_t iqtimer_pkt_dw18;
+	uint32_t iqtimer_pkt_dw19;
+	uint32_t iqtimer_pkt_dw20;
+	uint32_t iqtimer_pkt_dw21;
+	uint32_t iqtimer_pkt_dw22;
+	uint32_t iqtimer_pkt_dw23;
+	uint32_t iqtimer_pkt_dw24;
+	uint32_t iqtimer_pkt_dw25;
+	uint32_t iqtimer_pkt_dw26;
+	uint32_t iqtimer_pkt_dw27;
+	uint32_t iqtimer_pkt_dw28;
+	uint32_t iqtimer_pkt_dw29;
+	uint32_t iqtimer_pkt_dw30;
+	uint32_t iqtimer_pkt_dw31;
+	uint32_t reserved56;
+	uint32_t reserved57;
+	uint32_t reserved58;
+	uint32_t set_resources_header;
+	uint32_t set_resources_dw1;
+	uint32_t set_resources_dw2;
+	uint32_t set_resources_dw3;
+	uint32_t set_resources_dw4;
+	uint32_t set_resources_dw5;
+	uint32_t set_resources_dw6;
+	uint32_t set_resources_dw7;
+	uint32_t reserved59;
+	uint32_t reserved60;
+	uint32_t reserved61;
+	uint32_t reserved62;
+	uint32_t reserved63;
+	uint32_t reserved64;
+	uint32_t reserved65;
+	uint32_t reserved66;
+	uint32_t reserved67;
+	uint32_t reserved68;
+	uint32_t reserved69;
+	uint32_t reserved70;
+	uint32_t reserved71;
+	uint32_t reserved72;
+	uint32_t reserved73;
+	uint32_t reserved74;
+	uint32_t reserved75;
+	uint32_t reserved76;
+	uint32_t reserved77;
+	uint32_t reserved78;
+	uint32_t reserved_t[256];
+};
+
+struct vi_mqd_allocation {
+	struct vi_mqd mqd;
+	uint32_t wptr_poll_mem;
+	uint32_t rptr_report_mem;
+	uint32_t dyamic_cu_mask;
+	uint32_t dyamic_rb_mask;
+};
+
+struct cz_mqd {
+	uint32_t header;
+	uint32_t compute_dispatch_initiator;
+	uint32_t compute_dim_x;
+	uint32_t compute_dim_y;
+	uint32_t compute_dim_z;
+	uint32_t compute_start_x;
+	uint32_t compute_start_y;
+	uint32_t compute_start_z;
+	uint32_t compute_num_thread_x;
+	uint32_t compute_num_thread_y;
+	uint32_t compute_num_thread_z;
+	uint32_t compute_pipelinestat_enable;
+	uint32_t compute_perfcount_enable;
+	uint32_t compute_pgm_lo;
+	uint32_t compute_pgm_hi;
+	uint32_t compute_tba_lo;
+	uint32_t compute_tba_hi;
+	uint32_t compute_tma_lo;
+	uint32_t compute_tma_hi;
+	uint32_t compute_pgm_rsrc1;
+	uint32_t compute_pgm_rsrc2;
+	uint32_t compute_vmid;
+	uint32_t compute_resource_limits;
+	uint32_t compute_static_thread_mgmt_se0;
+	uint32_t compute_static_thread_mgmt_se1;
+	uint32_t compute_tmpring_size;
+	uint32_t compute_static_thread_mgmt_se2;
+	uint32_t compute_static_thread_mgmt_se3;
+	uint32_t compute_restart_x;
+	uint32_t compute_restart_y;
+	uint32_t compute_restart_z;
+	uint32_t compute_thread_trace_enable;
+	uint32_t compute_misc_reserved;
+	uint32_t compute_dispatch_id;
+	uint32_t compute_threadgroup_id;
+	uint32_t compute_relaunch;
+	uint32_t compute_wave_restore_addr_lo;
+	uint32_t compute_wave_restore_addr_hi;
+	uint32_t compute_wave_restore_control;
 	uint32_t reserved_39;
 	uint32_t reserved_40;
 	uint32_t reserved_41;

Comments

> -----Original Message-----

> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf

> Of Rex Zhu

> Sent: Monday, July 03, 2017 6:13 AM

> To: amd-gfx@lists.freedesktop.org

> Cc: Zhu, Rex

> Subject: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang

> on VI

> 

> caused by not program dynamic_cu_mask_addr in the KIQ MQD.

> 

> v2: create struct vi_mqd_allocation in FB which will contain

> 1. PM4 MQD structure.

> 2. Write Pointer Poll Memory.

> 3. Read Pointer Report Memory

> 4. Dynamic CU Mask.

> 5. Dynamic RB Mask.

> 

> Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958

> Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>


Reviewed-by: Alex Deucher <alexander.deucher@amd.com>


> ---

>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    |  27 ++--

>  drivers/gpu/drm/amd/include/vi_structs.h | 268

> +++++++++++++++++++++++++++++++

>  2 files changed, 285 insertions(+), 10 deletions(-)

> 

> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

> index 1a75ab1..452cc5b 100644

> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

> @@ -40,7 +40,6 @@

> 

>  #include "bif/bif_5_0_d.h"

>  #include "bif/bif_5_0_sh_mask.h"

> -

>  #include "gca/gfx_8_0_d.h"

>  #include "gca/gfx_8_0_enum.h"

>  #include "gca/gfx_8_0_sh_mask.h"

> @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)

>  		return r;

> 

>  	/* create MQD for all compute queues as well as KIQ for SRIOV case

> */

> -	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct

> vi_mqd));

> +	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct

> vi_mqd_allocation));

>  	if (r)

>  		return r;

> 

> @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring

> *ring)

>  	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;

>  	uint32_t tmp;

> 

> -	/* init the mqd struct */

> -	memset(mqd, 0, sizeof(struct vi_mqd));

> -

>  	mqd->header = 0xC0310800;

>  	mqd->compute_pipelinestat_enable = 0x00000001;

>  	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;

> @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring

> *ring)

>  	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;

>  	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;

>  	mqd->compute_misc_reserved = 0x00000003;

> -

> +	if (!(adev->flags & AMD_IS_APU)) {

> +		mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring-

> >mqd_gpu_addr

> +					     + offsetof(struct

> vi_mqd_allocation, dyamic_cu_mask));

> +		mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring-

> >mqd_gpu_addr

> +					     + offsetof(struct

> vi_mqd_allocation, dyamic_cu_mask));

> +	}

>  	eop_base_addr = ring->eop_gpu_addr >> 8;

>  	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;

>  	mqd->cp_hqd_eop_base_addr_hi =

> upper_32_bits(eop_base_addr);

> @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct

> amdgpu_ring *ring)

>  	if (adev->gfx.in_reset) { /* for GPU_RESET case */

>  		/* reset MQD to a clean status */

>  		if (adev->gfx.mec.mqd_backup[mqd_idx])

> -			memcpy(mqd, adev-

> >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));

> +			memcpy(mqd, adev-

> >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));

> 

>  		/* reset ring buffer */

>  		ring->wptr = 0;

> @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct

> amdgpu_ring *ring)

>  		vi_srbm_select(adev, 0, 0, 0, 0);

>  		mutex_unlock(&adev->srbm_mutex);

>  	} else {

> +		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));

> +		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask =

> 0xFFFFFFFF;

> +		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask =

> 0xFFFFFFFF;

>  		mutex_lock(&adev->srbm_mutex);

>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);

>  		gfx_v8_0_mqd_init(ring);

> @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct

> amdgpu_ring *ring)

>  		mutex_unlock(&adev->srbm_mutex);

> 

>  		if (adev->gfx.mec.mqd_backup[mqd_idx])

> -			memcpy(adev->gfx.mec.mqd_backup[mqd_idx],

> mqd, sizeof(*mqd));

> +			memcpy(adev->gfx.mec.mqd_backup[mqd_idx],

> mqd, sizeof(struct vi_mqd_allocation));

>  	}

> 

>  	return r;

> @@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct

> amdgpu_ring *ring)

>  	int mqd_idx = ring - &adev->gfx.compute_ring[0];

> 

>  	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {

> +		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));

> +		((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask =

> 0xFFFFFFFF;

> +		((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask =

> 0xFFFFFFFF;

>  		mutex_lock(&adev->srbm_mutex);

>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);

>  		gfx_v8_0_mqd_init(ring);

> @@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct

> amdgpu_ring *ring)

>  		mutex_unlock(&adev->srbm_mutex);

> 

>  		if (adev->gfx.mec.mqd_backup[mqd_idx])

> -			memcpy(adev->gfx.mec.mqd_backup[mqd_idx],

> mqd, sizeof(*mqd));

> +			memcpy(adev->gfx.mec.mqd_backup[mqd_idx],

> mqd, sizeof(struct vi_mqd_allocation));

>  	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */

>  		/* reset MQD to a clean status */

>  		if (adev->gfx.mec.mqd_backup[mqd_idx])

> -			memcpy(mqd, adev-

> >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));

> +			memcpy(mqd, adev-

> >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));

>  		/* reset ring buffer */

>  		ring->wptr = 0;

>  		amdgpu_ring_clear_ring(ring);

> diff --git a/drivers/gpu/drm/amd/include/vi_structs.h

> b/drivers/gpu/drm/amd/include/vi_structs.h

> index b68f8ef..ca93b51 100644

> --- a/drivers/gpu/drm/amd/include/vi_structs.h

> +++ b/drivers/gpu/drm/amd/include/vi_structs.h

> @@ -195,6 +195,274 @@ struct vi_mqd {

>  	uint32_t compute_wave_restore_addr_lo;

>  	uint32_t compute_wave_restore_addr_hi;

>  	uint32_t compute_wave_restore_control;

> +	uint32_t reserved9;

> +	uint32_t reserved10;

> +	uint32_t reserved11;

> +	uint32_t reserved12;

> +	uint32_t reserved13;

> +	uint32_t reserved14;

> +	uint32_t reserved15;

> +	uint32_t reserved16;

> +	uint32_t reserved17;

> +	uint32_t reserved18;

> +	uint32_t reserved19;

> +	uint32_t reserved20;

> +	uint32_t reserved21;

> +	uint32_t reserved22;

> +	uint32_t reserved23;

> +	uint32_t reserved24;

> +	uint32_t reserved25;

> +	uint32_t reserved26;

> +	uint32_t reserved27;

> +	uint32_t reserved28;

> +	uint32_t reserved29;

> +	uint32_t reserved30;

> +	uint32_t reserved31;

> +	uint32_t reserved32;

> +	uint32_t reserved33;

> +	uint32_t reserved34;

> +	uint32_t compute_user_data_0;

> +	uint32_t compute_user_data_1;

> +	uint32_t compute_user_data_2;

> +	uint32_t compute_user_data_3;

> +	uint32_t compute_user_data_4;

> +	uint32_t compute_user_data_5;

> +	uint32_t compute_user_data_6;

> +	uint32_t compute_user_data_7;

> +	uint32_t compute_user_data_8;

> +	uint32_t compute_user_data_9;

> +	uint32_t compute_user_data_10;

> +	uint32_t compute_user_data_11;

> +	uint32_t compute_user_data_12;

> +	uint32_t compute_user_data_13;

> +	uint32_t compute_user_data_14;

> +	uint32_t compute_user_data_15;

> +	uint32_t cp_compute_csinvoc_count_lo;

> +	uint32_t cp_compute_csinvoc_count_hi;

> +	uint32_t reserved35;

> +	uint32_t reserved36;

> +	uint32_t reserved37;

> +	uint32_t cp_mqd_query_time_lo;

> +	uint32_t cp_mqd_query_time_hi;

> +	uint32_t cp_mqd_connect_start_time_lo;

> +	uint32_t cp_mqd_connect_start_time_hi;

> +	uint32_t cp_mqd_connect_end_time_lo;

> +	uint32_t cp_mqd_connect_end_time_hi;

> +	uint32_t cp_mqd_connect_end_wf_count;

> +	uint32_t cp_mqd_connect_end_pq_rptr;

> +	uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr;

> +	uint32_t cp_mqd_connect_end_ib_rptr;

> +	uint32_t reserved38;

> +	uint32_t reserved39;

> +	uint32_t cp_mqd_save_start_time_lo;

> +	uint32_t cp_mqd_save_start_time_hi;

> +	uint32_t cp_mqd_save_end_time_lo;

> +	uint32_t cp_mqd_save_end_time_hi;

> +	uint32_t cp_mqd_restore_start_time_lo;

> +	uint32_t cp_mqd_restore_start_time_hi;

> +	uint32_t cp_mqd_restore_end_time_lo;

> +	uint32_t cp_mqd_restore_end_time_hi;

> +	uint32_t disable_queue;

> +	uint32_t reserved41;

> +	uint32_t gds_cs_ctxsw_cnt0;

> +	uint32_t gds_cs_ctxsw_cnt1;

> +	uint32_t gds_cs_ctxsw_cnt2;

> +	uint32_t gds_cs_ctxsw_cnt3;

> +	uint32_t reserved42;

> +	uint32_t reserved43;

> +	uint32_t cp_pq_exe_status_lo;

> +	uint32_t cp_pq_exe_status_hi;

> +	uint32_t cp_packet_id_lo;

> +	uint32_t cp_packet_id_hi;

> +	uint32_t cp_packet_exe_status_lo;

> +	uint32_t cp_packet_exe_status_hi;

> +	uint32_t gds_save_base_addr_lo;

> +	uint32_t gds_save_base_addr_hi;

> +	uint32_t gds_save_mask_lo;

> +	uint32_t gds_save_mask_hi;

> +	uint32_t ctx_save_base_addr_lo;

> +	uint32_t ctx_save_base_addr_hi;

> +	uint32_t dynamic_cu_mask_addr_lo;

> +	uint32_t dynamic_cu_mask_addr_hi;

> +	uint32_t cp_mqd_base_addr_lo;

> +	uint32_t cp_mqd_base_addr_hi;

> +	uint32_t cp_hqd_active;

> +	uint32_t cp_hqd_vmid;

> +	uint32_t cp_hqd_persistent_state;

> +	uint32_t cp_hqd_pipe_priority;

> +	uint32_t cp_hqd_queue_priority;

> +	uint32_t cp_hqd_quantum;

> +	uint32_t cp_hqd_pq_base_lo;

> +	uint32_t cp_hqd_pq_base_hi;

> +	uint32_t cp_hqd_pq_rptr;

> +	uint32_t cp_hqd_pq_rptr_report_addr_lo;

> +	uint32_t cp_hqd_pq_rptr_report_addr_hi;

> +	uint32_t cp_hqd_pq_wptr_poll_addr_lo;

> +	uint32_t cp_hqd_pq_wptr_poll_addr_hi;

> +	uint32_t cp_hqd_pq_doorbell_control;

> +	uint32_t cp_hqd_pq_wptr;

> +	uint32_t cp_hqd_pq_control;

> +	uint32_t cp_hqd_ib_base_addr_lo;

> +	uint32_t cp_hqd_ib_base_addr_hi;

> +	uint32_t cp_hqd_ib_rptr;

> +	uint32_t cp_hqd_ib_control;

> +	uint32_t cp_hqd_iq_timer;

> +	uint32_t cp_hqd_iq_rptr;

> +	uint32_t cp_hqd_dequeue_request;

> +	uint32_t cp_hqd_dma_offload;

> +	uint32_t cp_hqd_sema_cmd;

> +	uint32_t cp_hqd_msg_type;

> +	uint32_t cp_hqd_atomic0_preop_lo;

> +	uint32_t cp_hqd_atomic0_preop_hi;

> +	uint32_t cp_hqd_atomic1_preop_lo;

> +	uint32_t cp_hqd_atomic1_preop_hi;

> +	uint32_t cp_hqd_hq_status0;

> +	uint32_t cp_hqd_hq_control0;

> +	uint32_t cp_mqd_control;

> +	uint32_t cp_hqd_hq_status1;

> +	uint32_t cp_hqd_hq_control1;

> +	uint32_t cp_hqd_eop_base_addr_lo;

> +	uint32_t cp_hqd_eop_base_addr_hi;

> +	uint32_t cp_hqd_eop_control;

> +	uint32_t cp_hqd_eop_rptr;

> +	uint32_t cp_hqd_eop_wptr;

> +	uint32_t cp_hqd_eop_done_events;

> +	uint32_t cp_hqd_ctx_save_base_addr_lo;

> +	uint32_t cp_hqd_ctx_save_base_addr_hi;

> +	uint32_t cp_hqd_ctx_save_control;

> +	uint32_t cp_hqd_cntl_stack_offset;

> +	uint32_t cp_hqd_cntl_stack_size;

> +	uint32_t cp_hqd_wg_state_offset;

> +	uint32_t cp_hqd_ctx_save_size;

> +	uint32_t cp_hqd_gds_resource_state;

> +	uint32_t cp_hqd_error;

> +	uint32_t cp_hqd_eop_wptr_mem;

> +	uint32_t cp_hqd_eop_dones;

> +	uint32_t reserved46;

> +	uint32_t reserved47;

> +	uint32_t reserved48;

> +	uint32_t reserved49;

> +	uint32_t reserved50;

> +	uint32_t reserved51;

> +	uint32_t reserved52;

> +	uint32_t reserved53;

> +	uint32_t reserved54;

> +	uint32_t reserved55;

> +	uint32_t iqtimer_pkt_header;

> +	uint32_t iqtimer_pkt_dw0;

> +	uint32_t iqtimer_pkt_dw1;

> +	uint32_t iqtimer_pkt_dw2;

> +	uint32_t iqtimer_pkt_dw3;

> +	uint32_t iqtimer_pkt_dw4;

> +	uint32_t iqtimer_pkt_dw5;

> +	uint32_t iqtimer_pkt_dw6;

> +	uint32_t iqtimer_pkt_dw7;

> +	uint32_t iqtimer_pkt_dw8;

> +	uint32_t iqtimer_pkt_dw9;

> +	uint32_t iqtimer_pkt_dw10;

> +	uint32_t iqtimer_pkt_dw11;

> +	uint32_t iqtimer_pkt_dw12;

> +	uint32_t iqtimer_pkt_dw13;

> +	uint32_t iqtimer_pkt_dw14;

> +	uint32_t iqtimer_pkt_dw15;

> +	uint32_t iqtimer_pkt_dw16;

> +	uint32_t iqtimer_pkt_dw17;

> +	uint32_t iqtimer_pkt_dw18;

> +	uint32_t iqtimer_pkt_dw19;

> +	uint32_t iqtimer_pkt_dw20;

> +	uint32_t iqtimer_pkt_dw21;

> +	uint32_t iqtimer_pkt_dw22;

> +	uint32_t iqtimer_pkt_dw23;

> +	uint32_t iqtimer_pkt_dw24;

> +	uint32_t iqtimer_pkt_dw25;

> +	uint32_t iqtimer_pkt_dw26;

> +	uint32_t iqtimer_pkt_dw27;

> +	uint32_t iqtimer_pkt_dw28;

> +	uint32_t iqtimer_pkt_dw29;

> +	uint32_t iqtimer_pkt_dw30;

> +	uint32_t iqtimer_pkt_dw31;

> +	uint32_t reserved56;

> +	uint32_t reserved57;

> +	uint32_t reserved58;

> +	uint32_t set_resources_header;

> +	uint32_t set_resources_dw1;

> +	uint32_t set_resources_dw2;

> +	uint32_t set_resources_dw3;

> +	uint32_t set_resources_dw4;

> +	uint32_t set_resources_dw5;

> +	uint32_t set_resources_dw6;

> +	uint32_t set_resources_dw7;

> +	uint32_t reserved59;

> +	uint32_t reserved60;

> +	uint32_t reserved61;

> +	uint32_t reserved62;

> +	uint32_t reserved63;

> +	uint32_t reserved64;

> +	uint32_t reserved65;

> +	uint32_t reserved66;

> +	uint32_t reserved67;

> +	uint32_t reserved68;

> +	uint32_t reserved69;

> +	uint32_t reserved70;

> +	uint32_t reserved71;

> +	uint32_t reserved72;

> +	uint32_t reserved73;

> +	uint32_t reserved74;

> +	uint32_t reserved75;

> +	uint32_t reserved76;

> +	uint32_t reserved77;

> +	uint32_t reserved78;

> +	uint32_t reserved_t[256];

> +};

> +

> +struct vi_mqd_allocation {

> +	struct vi_mqd mqd;

> +	uint32_t wptr_poll_mem;

> +	uint32_t rptr_report_mem;

> +	uint32_t dyamic_cu_mask;

> +	uint32_t dyamic_rb_mask;

> +};

> +

> +struct cz_mqd {

> +	uint32_t header;

> +	uint32_t compute_dispatch_initiator;

> +	uint32_t compute_dim_x;

> +	uint32_t compute_dim_y;

> +	uint32_t compute_dim_z;

> +	uint32_t compute_start_x;

> +	uint32_t compute_start_y;

> +	uint32_t compute_start_z;

> +	uint32_t compute_num_thread_x;

> +	uint32_t compute_num_thread_y;

> +	uint32_t compute_num_thread_z;

> +	uint32_t compute_pipelinestat_enable;

> +	uint32_t compute_perfcount_enable;

> +	uint32_t compute_pgm_lo;

> +	uint32_t compute_pgm_hi;

> +	uint32_t compute_tba_lo;

> +	uint32_t compute_tba_hi;

> +	uint32_t compute_tma_lo;

> +	uint32_t compute_tma_hi;

> +	uint32_t compute_pgm_rsrc1;

> +	uint32_t compute_pgm_rsrc2;

> +	uint32_t compute_vmid;

> +	uint32_t compute_resource_limits;

> +	uint32_t compute_static_thread_mgmt_se0;

> +	uint32_t compute_static_thread_mgmt_se1;

> +	uint32_t compute_tmpring_size;

> +	uint32_t compute_static_thread_mgmt_se2;

> +	uint32_t compute_static_thread_mgmt_se3;

> +	uint32_t compute_restart_x;

> +	uint32_t compute_restart_y;

> +	uint32_t compute_restart_z;

> +	uint32_t compute_thread_trace_enable;

> +	uint32_t compute_misc_reserved;

> +	uint32_t compute_dispatch_id;

> +	uint32_t compute_threadgroup_id;

> +	uint32_t compute_relaunch;

> +	uint32_t compute_wave_restore_addr_lo;

> +	uint32_t compute_wave_restore_addr_hi;

> +	uint32_t compute_wave_restore_control;

>  	uint32_t reserved_39;

>  	uint32_t reserved_40;

>  	uint32_t reserved_41;

> --

> 1.9.1

> 

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx