[04/10] drm/msm/gpu: Add per-submission statistics

Submitted by Jordan Crouse on Nov. 2, 2018, 3:25 p.m.

Details

Message ID 20181102152526.23854-5-jcrouse@codeaurora.org
State New
Headers show
Series "v2: drm/msm/gpu updates for 4.21" ( rev: 1 ) in Freedreno

Not browsing as part of any series.

Commit Message

Jordan Crouse Nov. 2, 2018, 3:25 p.m.
Add infrastructure to track statistics for GPU submissions
by sampling certain perfcounters before and after a submission.

To store the statistics, the per-ring memptrs region is
expanded to include room for up to 64 entries - this should
cover a reasonable amount of inflight submissions without
worrying about losing data. The target specific code inserts
PM4 commands to sample the counters before and after
submission and store them in the data region. The CPU can
access the data after the submission retires to make sense
of the statistics and communicate them to the user.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 20 +++++++++-------
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 34 ++++++++++++++++++++-------
 drivers/gpu/drm/msm/msm_ringbuffer.h  | 16 +++++++++++++
 3 files changed, 54 insertions(+), 16 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 0a0ceb76e2ba..e816947ac7d8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -346,16 +346,20 @@  static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
 	ret = gmu_poll_timeout(gmu, REG_A6XX_RSCC_SEQ_BUSY_DRV0, val,
 		!val, 100, 10000);
 
-	if (!ret) {
-		gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
-
-		/* Re-enable the power counter */
-		gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
-		return 0;
+	if (ret) {
+		DRM_DEV_ERROR(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n");
+		return ret;
 	}
 
-	DRM_DEV_ERROR(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n");
-	return ret;
+	gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+	/* Set up CX GMU counter 0 to count busy ticks */
+	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
+	gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20);
+
+	/* Enable the power counter */
+	gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+	return 0;
 }
 
 static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 38b7a5a92bfb..cf66edfb5246 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -67,13 +67,34 @@  static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
 }
 
+static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
+		u64 iova)
+{
+	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+	OUT_RING(ring, counter | (1 << 30) | (2 << 18));
+	OUT_RING(ring, lower_32_bits(iova));
+	OUT_RING(ring, upper_32_bits(iova));
+}
+
 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_file_private *ctx)
 {
+	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
 	struct msm_drm_private *priv = gpu->dev->dev_private;
 	struct msm_ringbuffer *ring = submit->ring;
 	unsigned int i;
 
+	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+		rbmemptr_stats(ring, index, cpcycles_start));
+
+	/*
+	 * For PM4 the GMU register offsets are calculated from the base of the
+	 * GPU registers so we need to add 0x1a800 to the register value on A630
+	 * to get the right value from PM4.
+	 */
+	get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+		rbmemptr_stats(ring, index, alwayson_start));
+
 	/* Invalidate CCU depth and color */
 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 	OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH);
@@ -98,6 +119,11 @@  static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 		}
 	}
 
+	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+		rbmemptr_stats(ring, index, cpcycles_end));
+	get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+		rbmemptr_stats(ring, index, alwayson_end));
+
 	/* Write the fence to the scratch register */
 	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
 	OUT_RING(ring, submit->seqno);
@@ -387,14 +413,6 @@  static int a6xx_hw_init(struct msm_gpu *gpu)
 	/* Select CP0 to always count cycles */
 	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 
-	/* FIXME: not sure if this should live here or in a6xx_gmu.c */
-	gmu_write(&a6xx_gpu->gmu,  REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK,
-		0xff000000);
-	gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
-		0xff, 0x20);
-	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE,
-		0x01);
-
 	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index cffce094aecb..6434ebb13136 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -23,9 +23,25 @@ 
 #define rbmemptr(ring, member)  \
 	((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member))
 
+#define rbmemptr_stats(ring, index, member) \
+	(rbmemptr((ring), stats) + \
+	 ((index) * sizeof(struct msm_gpu_submit_stats)) + \
+	 offsetof(struct msm_gpu_submit_stats, member))
+
+struct msm_gpu_submit_stats {
+	u64 cpcycles_start;
+	u64 cpcycles_end;
+	u64 alwayson_start;
+	u64 alwayson_end;
+};
+
+#define MSM_GPU_SUBMIT_STATS_COUNT 64
+
 struct msm_rbmemptrs {
 	volatile uint32_t rptr;
 	volatile uint32_t fence;
+
+	volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
 };
 
 struct msm_ringbuffer {