drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues

Submitted by Marek Olšák on Jan. 22, 2019, 8:05 p.m.

Details

Message ID 20190122200546.1575-1-maraeo@gmail.com
State New
Headers show
Series "drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues" ( rev: 2 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Marek Olšák Jan. 22, 2019, 8:05 p.m.
From: Marek Olšák <marek.olsak@amd.com>

I'm not increasing the DRM version because GDS isn't totally without bugs yet.

v2: update emit_ib_size

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 19 +++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 21 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +++++++++++++++++++++++--
 include/uapi/drm/amdgpu_drm.h           |  5 ++++
 5 files changed, 82 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index ecbcefe49a98..f89f5734d985 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -30,20 +30,22 @@  struct amdgpu_bo;
 struct amdgpu_gds_asic_info {
 	uint32_t	total_size;
 	uint32_t	gfx_partition_size;
 	uint32_t	cs_partition_size;
 };
 
 struct amdgpu_gds {
 	struct amdgpu_gds_asic_info	mem;
 	struct amdgpu_gds_asic_info	gws;
 	struct amdgpu_gds_asic_info	oa;
+	uint32_t			gds_compute_max_wave_id;
+
 	/* At present, GDS, GWS and OA resources for gfx (graphics)
 	 * is always pre-allocated and available for graphics operation.
 	 * Such resource is shared between all gfx clients.
 	 * TODO: move this operation to user space
 	 * */
 	struct amdgpu_bo*		gds_gfx_bo;
 	struct amdgpu_bo*		gws_gfx_bo;
 	struct amdgpu_bo*		oa_gfx_bo;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7984292f9282..a59e0fdf5a97 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2257,20 +2257,36 @@  static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
 					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
 #endif
 					  (ib->gpu_addr & 0xFFFFFFFC));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	amdgpu_ring_write(ring, control);
 }
 
@@ -4993,21 +5009,21 @@  static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.get_rptr = gfx_v7_0_ring_get_rptr,
 	.get_wptr = gfx_v7_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v7_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v7_0_ring_emit_gds_switch */
 		7 + /* gfx_v7_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -5050,20 +5066,21 @@  static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->gfx.priv_inst_irq.num_types = 1;
 	adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
 }
 
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
+	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
 	if (adev->gds.mem.total_size == 64 * 1024) {
 		adev->gds.mem.gfx_partition_size = 4096;
 		adev->gds.mem.cs_partition_size = 4096;
 
 		adev->gds.gws.gfx_partition_size = 4;
 		adev->gds.gws.cs_partition_size = 4;
 
 		adev->gds.oa.gfx_partition_size = 4;
 		adev->gds.oa.cs_partition_size = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a26747681ed6..b8e50a34bdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6077,20 +6077,36 @@  static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
 					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 				(2 << 0) |
 #endif
 				(ib->gpu_addr & 0xFFFFFFFC));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	amdgpu_ring_write(ring, control);
 }
 
@@ -6883,21 +6899,21 @@  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
 		5 + /* hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -6913,21 +6929,21 @@  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
 		5 + /* hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		17 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 };
 
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
 {
@@ -6989,20 +7005,21 @@  static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 }
 
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
+	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
 	if (adev->gds.mem.total_size == 64 * 1024) {
 		adev->gds.mem.gfx_partition_size = 4096;
 		adev->gds.mem.cs_partition_size = 4096;
 
 		adev->gds.gws.gfx_partition_size = 4;
 		adev->gds.gws.cs_partition_size = 4;
 
 		adev->gds.oa.gfx_partition_size = 4;
 		adev->gds.oa.cs_partition_size = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 262ee3cf6f1c..5533f6e4f4a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4003,20 +4003,36 @@  static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
 					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 				(2 << 0) |
 #endif
 				lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, control);
 }
@@ -4722,21 +4738,21 @@  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -4757,21 +4773,21 @@  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 
@@ -4839,20 +4855,40 @@  static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 		adev->gds.mem.total_size = 0x10000;
 		break;
 	case CHIP_RAVEN:
 		adev->gds.mem.total_size = 0x1000;
 		break;
 	default:
 		adev->gds.mem.total_size = 0x10000;
 		break;
 	}
 
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+	case CHIP_VEGA20:
+		adev->gds.gds_compute_max_wave_id = 0x7ff;
+		break;
+	case CHIP_VEGA12:
+		adev->gds.gds_compute_max_wave_id = 0x27f;
+		break;
+	case CHIP_RAVEN:
+		if (adev->rev_id >= 0x8)
+			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
+		else
+			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
+		break;
+	default:
+		/* this really depends on the chip */
+		adev->gds.gds_compute_max_wave_id = 0x7ff;
+		break;
+	}
+
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
 
 	if (adev->gds.mem.total_size == 64 * 1024) {
 		adev->gds.mem.gfx_partition_size = 4096;
 		adev->gds.mem.cs_partition_size = 4096;
 
 		adev->gds.gws.gfx_partition_size = 4;
 		adev->gds.gws.cs_partition_size = 4;
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04814e4..662d379ea624 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -561,20 +561,25 @@  union drm_amdgpu_cs {
 /* Preamble flag, which means the IB could be dropped if no context switch */
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
 
 /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
 #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
 
 /* The IB fence should do the L2 writeback but not invalidate any shader
  * caches (L2/vL1/sL1/I$). */
 #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
 
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
 struct drm_amdgpu_cs_chunk_ib {
 	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
 	__u32 flags;
 	/** Virtual address to begin IB execution */
 	__u64 va_start;
 	/** Size of submission */
 	__u32 ib_bytes;
 	/** HW IP to submit to */
 	__u32 ip_type;

Comments

Ping

On Tue, Jan 22, 2019 at 3:05 PM Marek Olšák <maraeo@gmail.com> wrote:

> From: Marek Olšák <marek.olsak@amd.com>
>
> I'm not increasing the DRM version because GDS isn't totally without bugs
> yet.
>
> v2: update emit_ib_size
>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  2 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 19 +++++++++++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 21 +++++++++++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40 +++++++++++++++++++++++--
>  include/uapi/drm/amdgpu_drm.h           |  5 ++++
>  5 files changed, 82 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index ecbcefe49a98..f89f5734d985 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -30,20 +30,22 @@ struct amdgpu_bo;
>  struct amdgpu_gds_asic_info {
>         uint32_t        total_size;
>         uint32_t        gfx_partition_size;
>         uint32_t        cs_partition_size;
>  };
>
>  struct amdgpu_gds {
>         struct amdgpu_gds_asic_info     mem;
>         struct amdgpu_gds_asic_info     gws;
>         struct amdgpu_gds_asic_info     oa;
> +       uint32_t                        gds_compute_max_wave_id;
> +
>         /* At present, GDS, GWS and OA resources for gfx (graphics)
>          * is always pre-allocated and available for graphics operation.
>          * Such resource is shared between all gfx clients.
>          * TODO: move this operation to user space
>          * */
>         struct amdgpu_bo*               gds_gfx_bo;
>         struct amdgpu_bo*               gws_gfx_bo;
>         struct amdgpu_bo*               oa_gfx_bo;
>  };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 7984292f9282..a59e0fdf5a97 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2257,20 +2257,36 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
>  }
>
>  static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                           struct amdgpu_job *job,
>                                           struct amdgpu_ib *ib,
>                                           uint32_t flags)
>  {
>         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
>
> +       /* Currently, there is a high possibility to get wave ID mismatch
> +        * between ME and GDS, leading to a hw deadlock, because ME
> generates
> +        * different wave IDs than the GDS expects. This situation happens
> +        * randomly when at least 5 compute pipes use GDS ordered append.
> +        * The wave IDs generated by ME are also wrong after
> suspend/resume.
> +        * Those are probably bugs somewhere else in the kernel driver.
> +        *
> +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME
> and
> +        * GDS to 0 for this ring (me/pipe).
> +        */
> +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
> +               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG,
> 1));
> +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID -
> PACKET3_SET_CONFIG_REG_START);
> +               amdgpu_ring_write(ring,
> ring->adev->gds.gds_compute_max_wave_id);
> +       }
> +
>         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         amdgpu_ring_write(ring,
>  #ifdef __BIG_ENDIAN
>                                           (2 << 0) |
>  #endif
>                                           (ib->gpu_addr & 0xFFFFFFFC));
>         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
>         amdgpu_ring_write(ring, control);
>  }
>
> @@ -4993,21 +5009,21 @@ static const struct amdgpu_ring_funcs
> gfx_v7_0_ring_funcs_compute = {
>         .get_rptr = gfx_v7_0_ring_get_rptr,
>         .get_wptr = gfx_v7_0_ring_get_wptr_compute,
>         .set_wptr = gfx_v7_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v7_0_ring_emit_gds_switch */
>                 7 + /* gfx_v7_0_ring_emit_hdp_flush */
>                 5 + /* hdp invalidate */
>                 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
>                 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
> gfx_v7_0_ring_emit_vm_flush */
>                 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user
> fence, vm fence */
> -       .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
> +       .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v7_0_ring_emit_ib_compute,
>         .emit_fence = gfx_v7_0_ring_emit_fence_compute,
>         .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>         .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>         .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>         .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>         .test_ring = gfx_v7_0_ring_test_ring,
>         .test_ib = gfx_v7_0_ring_test_ib,
>         .insert_nop = amdgpu_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
> @@ -5050,20 +5066,21 @@ static void gfx_v7_0_set_irq_funcs(struct
> amdgpu_device *adev)
>         adev->gfx.priv_inst_irq.num_types = 1;
>         adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
>  }
>
>  static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
>  {
>         /* init asci gds info */
>         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>         adev->gds.gws.total_size = 64;
>         adev->gds.oa.total_size = 16;
> +       adev->gds.gds_compute_max_wave_id =
> RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
>
>         if (adev->gds.mem.total_size == 64 * 1024) {
>                 adev->gds.mem.gfx_partition_size = 4096;
>                 adev->gds.mem.cs_partition_size = 4096;
>
>                 adev->gds.gws.gfx_partition_size = 4;
>                 adev->gds.gws.cs_partition_size = 4;
>
>                 adev->gds.oa.gfx_partition_size = 4;
>                 adev->gds.oa.cs_partition_size = 1;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index a26747681ed6..b8e50a34bdb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6077,20 +6077,36 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
>  }
>
>  static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                           struct amdgpu_job *job,
>                                           struct amdgpu_ib *ib,
>                                           uint32_t flags)
>  {
>         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
>
> +       /* Currently, there is a high possibility to get wave ID mismatch
> +        * between ME and GDS, leading to a hw deadlock, because ME
> generates
> +        * different wave IDs than the GDS expects. This situation happens
> +        * randomly when at least 5 compute pipes use GDS ordered append.
> +        * The wave IDs generated by ME are also wrong after
> suspend/resume.
> +        * Those are probably bugs somewhere else in the kernel driver.
> +        *
> +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME
> and
> +        * GDS to 0 for this ring (me/pipe).
> +        */
> +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
> +               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG,
> 1));
> +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID -
> PACKET3_SET_CONFIG_REG_START);
> +               amdgpu_ring_write(ring,
> ring->adev->gds.gds_compute_max_wave_id);
> +       }
> +
>         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         amdgpu_ring_write(ring,
>  #ifdef __BIG_ENDIAN
>                                 (2 << 0) |
>  #endif
>                                 (ib->gpu_addr & 0xFFFFFFFC));
>         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
>         amdgpu_ring_write(ring, control);
>  }
>
> @@ -6883,21 +6899,21 @@ static const struct amdgpu_ring_funcs
> gfx_v8_0_ring_funcs_compute = {
>         .get_rptr = gfx_v8_0_ring_get_rptr,
>         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
>         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v8_0_ring_emit_gds_switch */
>                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
>                 5 + /* hdp_invalidate */
>                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
> gfx_v8_0_ring_emit_vm_flush */
>                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user
> fence, vm fence */
> -       .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
> +       .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
>         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
>         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
>         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
>         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
>         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
>         .test_ring = gfx_v8_0_ring_test_ring,
>         .test_ib = gfx_v8_0_ring_test_ib,
>         .insert_nop = amdgpu_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
> @@ -6913,21 +6929,21 @@ static const struct amdgpu_ring_funcs
> gfx_v8_0_ring_funcs_kiq = {
>         .get_rptr = gfx_v8_0_ring_get_rptr,
>         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
>         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v8_0_ring_emit_gds_switch */
>                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
>                 5 + /* hdp_invalidate */
>                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>                 17 + /* gfx_v8_0_ring_emit_vm_flush */
>                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user
> fence, vm fence */
> -       .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
> +       .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
>         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
>         .test_ring = gfx_v8_0_ring_test_ring,
>         .insert_nop = amdgpu_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
>         .emit_rreg = gfx_v8_0_ring_emit_rreg,
>         .emit_wreg = gfx_v8_0_ring_emit_wreg,
>  };
>
>  static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
>  {
> @@ -6989,20 +7005,21 @@ static void gfx_v8_0_set_rlc_funcs(struct
> amdgpu_device *adev)
>  {
>         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
>  }
>
>  static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
>  {
>         /* init asci gds info */
>         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>         adev->gds.gws.total_size = 64;
>         adev->gds.oa.total_size = 16;
> +       adev->gds.gds_compute_max_wave_id =
> RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
>
>         if (adev->gds.mem.total_size == 64 * 1024) {
>                 adev->gds.mem.gfx_partition_size = 4096;
>                 adev->gds.mem.cs_partition_size = 4096;
>
>                 adev->gds.gws.gfx_partition_size = 4;
>                 adev->gds.gws.cs_partition_size = 4;
>
>                 adev->gds.oa.gfx_partition_size = 4;
>                 adev->gds.oa.cs_partition_size = 1;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 262ee3cf6f1c..5533f6e4f4a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4003,20 +4003,36 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
>  }
>
>  static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                           struct amdgpu_job *job,
>                                           struct amdgpu_ib *ib,
>                                           uint32_t flags)
>  {
>         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
>
> +       /* Currently, there is a high possibility to get wave ID mismatch
> +        * between ME and GDS, leading to a hw deadlock, because ME
> generates
> +        * different wave IDs than the GDS expects. This situation happens
> +        * randomly when at least 5 compute pipes use GDS ordered append.
> +        * The wave IDs generated by ME are also wrong after
> suspend/resume.
> +        * Those are probably bugs somewhere else in the kernel driver.
> +        *
> +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME
> and
> +        * GDS to 0 for this ring (me/pipe).
> +        */
> +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
> +               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG,
> 1));
> +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
> +               amdgpu_ring_write(ring,
> ring->adev->gds.gds_compute_max_wave_id);
> +       }
> +
>         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
>         amdgpu_ring_write(ring,
>  #ifdef __BIG_ENDIAN
>                                 (2 << 0) |
>  #endif
>                                 lower_32_bits(ib->gpu_addr));
>         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
>         amdgpu_ring_write(ring, control);
>  }
> @@ -4722,21 +4738,21 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = {
>         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v9_0_ring_emit_gds_switch */
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                 5 + /* hdp invalidate */
>                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>                 2 + /* gfx_v9_0_ring_emit_vm_flush */
>                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence,
> vm fence */
> -       .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> +       .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
>         .emit_fence = gfx_v9_0_ring_emit_fence,
>         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
>         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
>         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
>         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
>         .test_ring = gfx_v9_0_ring_test_ring,
>         .test_ib = gfx_v9_0_ring_test_ib,
>         .insert_nop = amdgpu_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
> @@ -4757,21 +4773,21 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_kiq = {
>         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v9_0_ring_emit_gds_switch */
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                 5 + /* hdp invalidate */
>                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>                 2 + /* gfx_v9_0_ring_emit_vm_flush */
>                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user
> fence, vm fence */
> -       .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
> +       .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
>         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
>         .test_ring = gfx_v9_0_ring_test_ring,
>         .insert_nop = amdgpu_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
>         .emit_rreg = gfx_v9_0_ring_emit_rreg,
>         .emit_wreg = gfx_v9_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
>  };
>
> @@ -4839,20 +4855,40 @@ static void gfx_v9_0_set_gds_init(struct
> amdgpu_device *adev)
>                 adev->gds.mem.total_size = 0x10000;
>                 break;
>         case CHIP_RAVEN:
>                 adev->gds.mem.total_size = 0x1000;
>                 break;
>         default:
>                 adev->gds.mem.total_size = 0x10000;
>                 break;
>         }
>
> +       switch (adev->asic_type) {
> +       case CHIP_VEGA10:
> +       case CHIP_VEGA20:
> +               adev->gds.gds_compute_max_wave_id = 0x7ff;
> +               break;
> +       case CHIP_VEGA12:
> +               adev->gds.gds_compute_max_wave_id = 0x27f;
> +               break;
> +       case CHIP_RAVEN:
> +               if (adev->rev_id >= 0x8)
> +                       adev->gds.gds_compute_max_wave_id = 0x77; /*
> raven2 */
> +               else
> +                       adev->gds.gds_compute_max_wave_id = 0x15f; /*
> raven1 */
> +               break;
> +       default:
> +               /* this really depends on the chip */
> +               adev->gds.gds_compute_max_wave_id = 0x7ff;
> +               break;
> +       }
> +
>         adev->gds.gws.total_size = 64;
>         adev->gds.oa.total_size = 16;
>
>         if (adev->gds.mem.total_size == 64 * 1024) {
>                 adev->gds.mem.gfx_partition_size = 4096;
>                 adev->gds.mem.cs_partition_size = 4096;
>
>                 adev->gds.gws.gfx_partition_size = 4;
>                 adev->gds.gws.cs_partition_size = 4;
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index faaad04814e4..662d379ea624 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -561,20 +561,25 @@ union drm_amdgpu_cs {
>  /* Preamble flag, which means the IB could be dropped if no context
> switch */
>  #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
>
>  /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
>  #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
>
>  /* The IB fence should do the L2 writeback but not invalidate any shader
>   * caches (L2/vL1/sL1/I$). */
>  #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
>
> +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
> + * This will reset wave ID counters for the IB.
> + */
> +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
> +
>  struct drm_amdgpu_cs_chunk_ib {
>         __u32 _pad;
>         /** AMDGPU_IB_FLAG_* */
>         __u32 flags;
>         /** Virtual address to begin IB execution */
>         __u64 va_start;
>         /** Size of submission */
>         __u32 ib_bytes;
>         /** HW IP to submit to */
>         __u32 ip_type;
> --
> 2.17.1
>
>
At least from coding style, backward compatibility etc.. this looks sane 
to me, so feel free to add an Acked-by.

But I absolutely can't judge if that is correct from the hardware point 
of view or not.

And I think that somebody else looking at this is mandatory for it to be 
committed.

Christian.

Am 28.01.19 um 18:25 schrieb Marek Olšák:
> Ping
>
> On Tue, Jan 22, 2019 at 3:05 PM Marek Olšák <maraeo@gmail.com 
> <mailto:maraeo@gmail.com>> wrote:
>
>     From: Marek Olšák <marek.olsak@amd.com <mailto:marek.olsak@amd.com>>
>
>     I'm not increasing the DRM version because GDS isn't totally
>     without bugs yet.
>
>     v2: update emit_ib_size
>
>     Signed-off-by: Marek Olšák <marek.olsak@amd.com
>     <mailto:marek.olsak@amd.com>>
>     ---
>      drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  2 ++
>      drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 19 +++++++++++-
>      drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 21 +++++++++++--
>      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 40
>     +++++++++++++++++++++++--
>      include/uapi/drm/amdgpu_drm.h           |  5 ++++
>      5 files changed, 82 insertions(+), 5 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     index ecbcefe49a98..f89f5734d985 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     @@ -30,20 +30,22 @@ struct amdgpu_bo;
>      struct amdgpu_gds_asic_info {
>             uint32_t        total_size;
>             uint32_t        gfx_partition_size;
>             uint32_t        cs_partition_size;
>      };
>
>      struct amdgpu_gds {
>             struct amdgpu_gds_asic_info     mem;
>             struct amdgpu_gds_asic_info     gws;
>             struct amdgpu_gds_asic_info     oa;
>     +       uint32_t gds_compute_max_wave_id;
>     +
>             /* At present, GDS, GWS and OA resources for gfx (graphics)
>              * is always pre-allocated and available for graphics
>     operation.
>              * Such resource is shared between all gfx clients.
>              * TODO: move this operation to user space
>              * */
>             struct amdgpu_bo*               gds_gfx_bo;
>             struct amdgpu_bo*               gws_gfx_bo;
>             struct amdgpu_bo*               oa_gfx_bo;
>      };
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>     index 7984292f9282..a59e0fdf5a97 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>     @@ -2257,20 +2257,36 @@ static void
>     gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>      }
>
>      static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                               struct amdgpu_job *job,
>                                               struct amdgpu_ib *ib,
>                                               uint32_t flags)
>      {
>             unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>             u32 control = INDIRECT_BUFFER_VALID | ib->length_dw |
>     (vmid << 24);
>
>     +       /* Currently, there is a high possibility to get wave ID
>     mismatch
>     +        * between ME and GDS, leading to a hw deadlock, because
>     ME generates
>     +        * different wave IDs than the GDS expects. This situation
>     happens
>     +        * randomly when at least 5 compute pipes use GDS ordered
>     append.
>     +        * The wave IDs generated by ME are also wrong after
>     suspend/resume.
>     +        * Those are probably bugs somewhere else in the kernel
>     driver.
>     +        *
>     +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters
>     in ME and
>     +        * GDS to 0 for this ring (me/pipe).
>     +        */
>     +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
>     +               amdgpu_ring_write(ring,
>     PACKET3(PACKET3_SET_CONFIG_REG, 1));
>     +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID
>     - PACKET3_SET_CONFIG_REG_START);
>     +               amdgpu_ring_write(ring,
>     ring->adev->gds.gds_compute_max_wave_id);
>     +       }
>     +
>             amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>             amdgpu_ring_write(ring,
>      #ifdef __BIG_ENDIAN
>                                               (2 << 0) |
>      #endif
>                                               (ib->gpu_addr &
>     0xFFFFFFFC));
>             amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
>             amdgpu_ring_write(ring, control);
>      }
>
>     @@ -4993,21 +5009,21 @@ static const struct amdgpu_ring_funcs
>     gfx_v7_0_ring_funcs_compute = {
>             .get_rptr = gfx_v7_0_ring_get_rptr,
>             .get_wptr = gfx_v7_0_ring_get_wptr_compute,
>             .set_wptr = gfx_v7_0_ring_set_wptr_compute,
>             .emit_frame_size =
>                     20 + /* gfx_v7_0_ring_emit_gds_switch */
>                     7 + /* gfx_v7_0_ring_emit_hdp_flush */
>                     5 + /* hdp invalidate */
>                     7 + /* gfx_v7_0_ring_emit_pipeline_sync */
>                     CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
>     gfx_v7_0_ring_emit_vm_flush */
>                     7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3
>     for user fence, vm fence */
>     -       .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
>     +       .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
>             .emit_ib = gfx_v7_0_ring_emit_ib_compute,
>             .emit_fence = gfx_v7_0_ring_emit_fence_compute,
>             .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
>             .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
>             .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
>             .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
>             .test_ring = gfx_v7_0_ring_test_ring,
>             .test_ib = gfx_v7_0_ring_test_ib,
>             .insert_nop = amdgpu_ring_insert_nop,
>             .pad_ib = amdgpu_ring_generic_pad_ib,
>     @@ -5050,20 +5066,21 @@ static void gfx_v7_0_set_irq_funcs(struct
>     amdgpu_device *adev)
>             adev->gfx.priv_inst_irq.num_types = 1;
>             adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
>      }
>
>      static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
>      {
>             /* init asci gds info */
>             adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>             adev->gds.gws.total_size = 64;
>             adev->gds.oa.total_size = 16;
>     +       adev->gds.gds_compute_max_wave_id =
>     RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
>
>             if (adev->gds.mem.total_size == 64 * 1024) {
>                     adev->gds.mem.gfx_partition_size = 4096;
>                     adev->gds.mem.cs_partition_size = 4096;
>
>                     adev->gds.gws.gfx_partition_size = 4;
>                     adev->gds.gws.cs_partition_size = 4;
>
>                     adev->gds.oa.gfx_partition_size = 4;
>                     adev->gds.oa.cs_partition_size = 1;
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     index a26747681ed6..b8e50a34bdb3 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>     @@ -6077,20 +6077,36 @@ static void
>     gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>      }
>
>      static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                               struct amdgpu_job *job,
>                                               struct amdgpu_ib *ib,
>                                               uint32_t flags)
>      {
>             unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>             u32 control = INDIRECT_BUFFER_VALID | ib->length_dw |
>     (vmid << 24);
>
>     +       /* Currently, there is a high possibility to get wave ID
>     mismatch
>     +        * between ME and GDS, leading to a hw deadlock, because
>     ME generates
>     +        * different wave IDs than the GDS expects. This situation
>     happens
>     +        * randomly when at least 5 compute pipes use GDS ordered
>     append.
>     +        * The wave IDs generated by ME are also wrong after
>     suspend/resume.
>     +        * Those are probably bugs somewhere else in the kernel
>     driver.
>     +        *
>     +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters
>     in ME and
>     +        * GDS to 0 for this ring (me/pipe).
>     +        */
>     +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
>     +               amdgpu_ring_write(ring,
>     PACKET3(PACKET3_SET_CONFIG_REG, 1));
>     +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID
>     - PACKET3_SET_CONFIG_REG_START);
>     +               amdgpu_ring_write(ring,
>     ring->adev->gds.gds_compute_max_wave_id);
>     +       }
>     +
>             amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>             amdgpu_ring_write(ring,
>      #ifdef __BIG_ENDIAN
>                                     (2 << 0) |
>      #endif
>                                     (ib->gpu_addr & 0xFFFFFFFC));
>             amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
>             amdgpu_ring_write(ring, control);
>      }
>
>     @@ -6883,21 +6899,21 @@ static const struct amdgpu_ring_funcs
>     gfx_v8_0_ring_funcs_compute = {
>             .get_rptr = gfx_v8_0_ring_get_rptr,
>             .get_wptr = gfx_v8_0_ring_get_wptr_compute,
>             .set_wptr = gfx_v8_0_ring_set_wptr_compute,
>             .emit_frame_size =
>                     20 + /* gfx_v8_0_ring_emit_gds_switch */
>                     7 + /* gfx_v8_0_ring_emit_hdp_flush */
>                     5 + /* hdp_invalidate */
>                     7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>                     VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /*
>     gfx_v8_0_ring_emit_vm_flush */
>                     7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3
>     for user fence, vm fence */
>     -       .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
>     +       .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
>             .emit_ib = gfx_v8_0_ring_emit_ib_compute,
>             .emit_fence = gfx_v8_0_ring_emit_fence_compute,
>             .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
>             .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
>             .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
>             .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
>             .test_ring = gfx_v8_0_ring_test_ring,
>             .test_ib = gfx_v8_0_ring_test_ib,
>             .insert_nop = amdgpu_ring_insert_nop,
>             .pad_ib = amdgpu_ring_generic_pad_ib,
>     @@ -6913,21 +6929,21 @@ static const struct amdgpu_ring_funcs
>     gfx_v8_0_ring_funcs_kiq = {
>             .get_rptr = gfx_v8_0_ring_get_rptr,
>             .get_wptr = gfx_v8_0_ring_get_wptr_compute,
>             .set_wptr = gfx_v8_0_ring_set_wptr_compute,
>             .emit_frame_size =
>                     20 + /* gfx_v8_0_ring_emit_gds_switch */
>                     7 + /* gfx_v8_0_ring_emit_hdp_flush */
>                     5 + /* hdp_invalidate */
>                     7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>                     17 + /* gfx_v8_0_ring_emit_vm_flush */
>                     7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for
>     user fence, vm fence */
>     -       .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
>     +       .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
>             .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
>             .test_ring = gfx_v8_0_ring_test_ring,
>             .insert_nop = amdgpu_ring_insert_nop,
>             .pad_ib = amdgpu_ring_generic_pad_ib,
>             .emit_rreg = gfx_v8_0_ring_emit_rreg,
>             .emit_wreg = gfx_v8_0_ring_emit_wreg,
>      };
>
>      static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
>      {
>     @@ -6989,20 +7005,21 @@ static void gfx_v8_0_set_rlc_funcs(struct
>     amdgpu_device *adev)
>      {
>             adev->gfx.rlc.funcs = &iceland_rlc_funcs;
>      }
>
>      static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
>      {
>             /* init asci gds info */
>             adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
>             adev->gds.gws.total_size = 64;
>             adev->gds.oa.total_size = 16;
>     +       adev->gds.gds_compute_max_wave_id =
>     RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
>
>             if (adev->gds.mem.total_size == 64 * 1024) {
>                     adev->gds.mem.gfx_partition_size = 4096;
>                     adev->gds.mem.cs_partition_size = 4096;
>
>                     adev->gds.gws.gfx_partition_size = 4;
>                     adev->gds.gws.cs_partition_size = 4;
>
>                     adev->gds.oa.gfx_partition_size = 4;
>                     adev->gds.oa.cs_partition_size = 1;
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     index 262ee3cf6f1c..5533f6e4f4a4 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     @@ -4003,20 +4003,36 @@ static void
>     gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>      }
>
>      static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
>                                               struct amdgpu_job *job,
>                                               struct amdgpu_ib *ib,
>                                               uint32_t flags)
>      {
>             unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>             u32 control = INDIRECT_BUFFER_VALID | ib->length_dw |
>     (vmid << 24);
>
>     +       /* Currently, there is a high possibility to get wave ID
>     mismatch
>     +        * between ME and GDS, leading to a hw deadlock, because
>     ME generates
>     +        * different wave IDs than the GDS expects. This situation
>     happens
>     +        * randomly when at least 5 compute pipes use GDS ordered
>     append.
>     +        * The wave IDs generated by ME are also wrong after
>     suspend/resume.
>     +        * Those are probably bugs somewhere else in the kernel
>     driver.
>     +        *
>     +        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters
>     in ME and
>     +        * GDS to 0 for this ring (me/pipe).
>     +        */
>     +       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
>     +               amdgpu_ring_write(ring,
>     PACKET3(PACKET3_SET_CONFIG_REG, 1));
>     +               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
>     +               amdgpu_ring_write(ring,
>     ring->adev->gds.gds_compute_max_wave_id);
>     +       }
>     +
>             amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>             BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
>             amdgpu_ring_write(ring,
>      #ifdef __BIG_ENDIAN
>                                     (2 << 0) |
>      #endif
>     lower_32_bits(ib->gpu_addr));
>             amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
>             amdgpu_ring_write(ring, control);
>      }
>     @@ -4722,21 +4738,21 @@ static const struct amdgpu_ring_funcs
>     gfx_v9_0_ring_funcs_compute = {
>             .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>             .emit_frame_size =
>                     20 + /* gfx_v9_0_ring_emit_gds_switch */
>                     7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                     5 + /* hdp invalidate */
>                     7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>                     SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>                     SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>                     2 + /* gfx_v9_0_ring_emit_vm_flush */
>                     8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user
>     fence, vm fence */
>     -       .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
>     +       .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
>             .emit_ib = gfx_v9_0_ring_emit_ib_compute,
>             .emit_fence = gfx_v9_0_ring_emit_fence,
>             .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
>             .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
>             .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
>             .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
>             .test_ring = gfx_v9_0_ring_test_ring,
>             .test_ib = gfx_v9_0_ring_test_ib,
>             .insert_nop = amdgpu_ring_insert_nop,
>             .pad_ib = amdgpu_ring_generic_pad_ib,
>     @@ -4757,21 +4773,21 @@ static const struct amdgpu_ring_funcs
>     gfx_v9_0_ring_funcs_kiq = {
>             .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>             .emit_frame_size =
>                     20 + /* gfx_v9_0_ring_emit_gds_switch */
>                     7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                     5 + /* hdp invalidate */
>                     7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>                     SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>                     SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>                     2 + /* gfx_v9_0_ring_emit_vm_flush */
>                     8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for
>     user fence, vm fence */
>     -       .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
>     +       .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
>             .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
>             .test_ring = gfx_v9_0_ring_test_ring,
>             .insert_nop = amdgpu_ring_insert_nop,
>             .pad_ib = amdgpu_ring_generic_pad_ib,
>             .emit_rreg = gfx_v9_0_ring_emit_rreg,
>             .emit_wreg = gfx_v9_0_ring_emit_wreg,
>             .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>             .emit_reg_write_reg_wait =
>     gfx_v9_0_ring_emit_reg_write_reg_wait,
>      };
>
>     @@ -4839,20 +4855,40 @@ static void gfx_v9_0_set_gds_init(struct
>     amdgpu_device *adev)
>                     adev->gds.mem.total_size = 0x10000;
>                     break;
>             case CHIP_RAVEN:
>                     adev->gds.mem.total_size = 0x1000;
>                     break;
>             default:
>                     adev->gds.mem.total_size = 0x10000;
>                     break;
>             }
>
>     +       switch (adev->asic_type) {
>     +       case CHIP_VEGA10:
>     +       case CHIP_VEGA20:
>     +               adev->gds.gds_compute_max_wave_id = 0x7ff;
>     +               break;
>     +       case CHIP_VEGA12:
>     +               adev->gds.gds_compute_max_wave_id = 0x27f;
>     +               break;
>     +       case CHIP_RAVEN:
>     +               if (adev->rev_id >= 0x8)
>     +                       adev->gds.gds_compute_max_wave_id = 0x77;
>     /* raven2 */
>     +               else
>     +                       adev->gds.gds_compute_max_wave_id = 0x15f;
>     /* raven1 */
>     +               break;
>     +       default:
>     +               /* this really depends on the chip */
>     +               adev->gds.gds_compute_max_wave_id = 0x7ff;
>     +               break;
>     +       }
>     +
>             adev->gds.gws.total_size = 64;
>             adev->gds.oa.total_size = 16;
>
>             if (adev->gds.mem.total_size == 64 * 1024) {
>                     adev->gds.mem.gfx_partition_size = 4096;
>                     adev->gds.mem.cs_partition_size = 4096;
>
>                     adev->gds.gws.gfx_partition_size = 4;
>                     adev->gds.gws.cs_partition_size = 4;
>
>     diff --git a/include/uapi/drm/amdgpu_drm.h
>     b/include/uapi/drm/amdgpu_drm.h
>     index faaad04814e4..662d379ea624 100644
>     --- a/include/uapi/drm/amdgpu_drm.h
>     +++ b/include/uapi/drm/amdgpu_drm.h
>     @@ -561,20 +561,25 @@ union drm_amdgpu_cs {
>      /* Preamble flag, which means the IB could be dropped if no
>     context switch */
>      #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
>
>      /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag
>     detected */
>      #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
>
>      /* The IB fence should do the L2 writeback but not invalidate any
>     shader
>       * caches (L2/vL1/sL1/I$). */
>      #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
>
>     +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before
>     PACKET3_INDIRECT_BUFFER.
>     + * This will reset wave ID counters for the IB.
>     + */
>     +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
>     +
>      struct drm_amdgpu_cs_chunk_ib {
>             __u32 _pad;
>             /** AMDGPU_IB_FLAG_* */
>             __u32 flags;
>             /** Virtual address to begin IB execution */
>             __u64 va_start;
>             /** Size of submission */
>             __u32 ib_bytes;
>             /** HW IP to submit to */
>             __u32 ip_type;
>     -- 
>     2.17.1
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On Mon, Feb 4, 2019 at 7:42 AM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> At least from coding style, backward compatibility etc.. this looks sane
> to me, so feel free to add an Acked-by.
>
> But I absolutely can't judge if that is correct from the hardware point of
> view or not.
>

Our GDS docs say that writing the register resets the wave counter.


>
> And I think that somebody else looking at this is mandatory for it to be
> committed.
>

There won't be anybody else. Nobody here really understands GDS, nobody
here really cares about GDS, and hw guys weren't helpful with the hangs. If
I didn't discover this by luck, GDS OA would be unusable on Linux.

Marek
FYI, when I push this, I'll bump the DRM version.

Marek

On Mon, Feb 4, 2019 at 10:55 AM Marek Olšák <maraeo@gmail.com> wrote:

> On Mon, Feb 4, 2019 at 7:42 AM Christian König <
> ckoenig.leichtzumerken@gmail.com> wrote:
>
>> At least from coding style, backward compatibility etc.. this looks sane
>> to me, so feel free to add an Acked-by.
>>
>> But I absolutely can't judge if that is correct from the hardware point
>> of view or not.
>>
>
> Our GDS docs say that writing the register resets the wave counter.
>
>
>>
>> And I think that somebody else looking at this is mandatory for it to be
>> committed.
>>
>
> There won't be anybody else. Nobody here really understands GDS, nobody
> here really cares about GDS, and hw guys weren't helpful with the hangs. If
> I didn't discover this by luck, GDS OA would be unusable on Linux.
>
> Marek
>
Am 04.02.19 um 16:55 schrieb Marek Olšák:
On Mon, Feb 4, 2019 at 7:42 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
At least from coding style, backward compatibility etc.. this looks sane to me, so feel free to add an Acked-by.

But I absolutely can't judge if that is correct from the hardware point of view or not.

Our GDS docs say that writing the register resets the wave counter.


And I think that somebody else looking at this is mandatory for it to be committed.

There won't be anybody else. Nobody here really understands GDS, nobody here really cares about GDS, and hw guys weren't helpful with the hangs. If I didn't discover this by luck, GDS OA would be unusable on Linux.

Great, as long as Alex is ok with this feel free to go ahead.

Christian.


Marek
On Mon, Feb 4, 2019 at 11:09 AM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
>
> Am 04.02.19 um 16:55 schrieb Marek Olšák:
>
> On Mon, Feb 4, 2019 at 7:42 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote:
>>
>> At least from coding style, backward compatibility etc.. this looks sane to me, so feel free to add an Acked-by.
>>
>> But I absolutely can't judge if that is correct from the hardware point of view or not.
>
>
> Our GDS docs say that writing the register resets the wave counter.
>
>>
>>
>> And I think that somebody else looking at this is mandatory for it to be committed.
>
>
> There won't be anybody else. Nobody here really understands GDS, nobody here really cares about GDS, and hw guys weren't helpful with the hangs. If I didn't discover this by luck, GDS OA would be unusable on Linux.
>
>
> Great, as long as Alex is ok with this feel free to go ahead.

Acked-by: Alex Deucher <alexander.deucher@amd.com>

>
> Christian.
>
>
> Marek
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx