[v2] radeonsi: emit_spi_map packets optimization

Submitted by Jiang, Sonny on July 18, 2018, 9:48 p.m.

Details

Message ID 1531950530-12852-1-git-send-email-sonny.jiang@amd.com
State New
Headers show
Series "radeonsi: emit_spi_map packets optimization" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Jiang, Sonny July 18, 2018, 9:48 p.m.
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
---
 src/gallium/drivers/radeonsi/si_build_pm4.h     | 24 ++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_gfx_cs.c        |  3 +++
 src/gallium/drivers/radeonsi/si_state.h         |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 21 +++++++++++++--------
 4 files changed, 41 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index b339cd5..8fc08f7 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -214,4 +214,28 @@  static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned
 	}
 }
 
+/**
+ * Set consecutive registers if any registers value is different.
+ */
+static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset,
+					       unsigned * value, unsigned * saved_val,
+					       unsigned num)
+{
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+	int i, j;
+
+	for (i = 0; i < num; i++) {
+		if (saved_val[i] != value[i]) {
+			radeon_set_context_reg_seq(cs, offset, num);
+			for (j = 0; j < num; j++) {
+				radeon_emit(cs, value[j]);
+			}
+
+			memcpy(saved_val, value, sizeof(uint32_t) * num);
+
+			break;
+		}
+	}
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 628b6c5..16aa4f9 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -353,4 +353,7 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 		/* Set all saved registers state to unknown. */
 		ctx->tracked_regs.reg_saved = 0;
 	}
+
+	/* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
+	memset(ctx->tracked_regs.reg_val_seq, 0xff, sizeof(uint32_t) * 32);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 71056c7..c2d0287 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -287,6 +287,7 @@  enum si_tracked_reg {
 struct si_tracked_regs {
 	uint32_t		reg_saved;
 	uint32_t		reg_value[SI_NUM_TRACKED_REGS];
+	uint32_t		reg_val_seq[32];
 };
 
 /* Private read-write buffer slots. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ffc8821..a903df8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2634,27 +2634,25 @@  static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 
 static void si_emit_spi_map(struct si_context *sctx)
 {
-	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
 	unsigned i, num_interp, num_written = 0, bcol_interp[2];
+	unsigned spi_ps_input_cntl[32];
 
 	if (!ps || !ps->selector->info.num_inputs)
 		return;
 
 	num_interp = si_get_ps_num_interp(ps);
 	assert(num_interp > 0);
-	radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
 
 	for (i = 0; i < psinfo->num_inputs; i++) {
 		unsigned name = psinfo->input_semantic_name[i];
 		unsigned index = psinfo->input_semantic_index[i];
 		unsigned interpolate = psinfo->input_interpolate[i];
 
-		radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
-						     interpolate));
-		num_written++;
+		spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, name,
+							    index, interpolate);
 
 		if (name == TGSI_SEMANTIC_COLOR) {
 			assert(index < ARRAY_SIZE(bcol_interp));
@@ -2669,12 +2667,19 @@  static void si_emit_spi_map(struct si_context *sctx)
 			if (!(psinfo->colors_read & (0xf << (i * 4))))
 				continue;
 
-			radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
-							     i, bcol_interp[i]));
-			num_written++;
+			spi_ps_input_cntl[num_written++] =
+			  si_get_ps_input_cntl(sctx, vs, bcol, i, bcol_interp[i]);
+
 		}
 	}
 	assert(num_interp == num_written);
+
+	/* R_028644_SPI_PS_INPUT_CNTL_0 */
+	/* Dota 2: Only ~16% of SPI map updates set different values. */
+	/* Talos: Only ~9% of SPI map updates set different values. */
+	radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
+				    spi_ps_input_cntl,
+				    sctx->tracked_regs.reg_val_seq, num_interp);
 }
 
 /**

Comments

Thanks. I've pushed this and made some changes to it. See below for
the changes I made.

On Wed, Jul 18, 2018 at 5:48 PM, Sonny Jiang <sonny.jiang@amd.com> wrote:
> Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
> ---
>  src/gallium/drivers/radeonsi/si_build_pm4.h     | 24 ++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/si_gfx_cs.c        |  3 +++
>  src/gallium/drivers/radeonsi/si_state.h         |  1 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 21 +++++++++++++--------
>  4 files changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
> index b339cd5..8fc08f7 100644
> --- a/src/gallium/drivers/radeonsi/si_build_pm4.h
> +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
> @@ -214,4 +214,28 @@ static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned
>         }
>  }
>
> +/**
> + * Set consecutive registers if any registers value is different.
> + */
> +static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset,
> +                                              unsigned * value, unsigned * saved_val,

I removed the space between * and variables, because * is a variable qualifier.
Example: int i, *a, *b, *c, x, **y;

> +                                              unsigned num)
> +{
> +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> +       int i, j;
> +
> +       for (i = 0; i < num; i++) {
> +               if (saved_val[i] != value[i]) {
> +                       radeon_set_context_reg_seq(cs, offset, num);
> +                       for (j = 0; j < num; j++) {
> +                               radeon_emit(cs, value[j]);
> +                       }

I removed { and } for the single-line block.


> +
> +                       memcpy(saved_val, value, sizeof(uint32_t) * num);
> +
> +                       break;

I removed the empty line.

> +               }
> +       }
> +}
> +
>  #endif
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> index 628b6c5..16aa4f9 100644
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> @@ -353,4 +353,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>                 /* Set all saved registers state to unknown. */
>                 ctx->tracked_regs.reg_saved = 0;
>         }
> +
> +       /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
> +       memset(ctx->tracked_regs.reg_val_seq, 0xff, sizeof(uint32_t) * 32);
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 71056c7..c2d0287 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -287,6 +287,7 @@ enum si_tracked_reg {
>  struct si_tracked_regs {
>         uint32_t                reg_saved;
>         uint32_t                reg_value[SI_NUM_TRACKED_REGS];
> +       uint32_t                reg_val_seq[32];

I renamed reg_val_seq to spi_ps_input_cntl, because the variable only
stores those registers and no others.

Marek