[1/6] radeonsi: optimizing SET_CONTEXT_REG for shaders ES

Submitted by Jiang, Sonny on Sept. 18, 2018, 8:21 p.m.

Details

Message ID 20180918202115.9125-1-sonny.jiang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Jiang, Sonny Sept. 18, 2018, 8:21 p.m.
Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
---
 src/gallium/drivers/radeonsi/si_gfx_cs.c      |  1 +
 src/gallium/drivers/radeonsi/si_pm4.c         |  3 +++
 src/gallium/drivers/radeonsi/si_pm4.h         | 11 ++++++++++
 src/gallium/drivers/radeonsi/si_state.h       |  9 ++------
 .../drivers/radeonsi/si_state_shaders.c       | 21 ++++++++++++++++---
 5 files changed, 35 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 38b85ce624..99376c4847 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -351,6 +351,7 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]	= 0xffff;
+		ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
 
 		/* Set all saved registers state to saved. */
 		ctx->tracked_regs.reg_saved = 0xffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 446edea49a..1e686d8060 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -144,6 +144,9 @@  void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 		radeon_emit(cs, ib->gpu_address >> 32);
 		radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
 	}
+
+	if (state->atom.emit)
+		state->atom.emit(sctx);
 }
 
 void si_pm4_reset_emitted(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 020da7a390..08e37714e4 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -33,8 +33,17 @@ 
 // forward defines
 struct si_context;
 
+/* State atoms are callbacks which write a sequence of packets into a GPU
+ * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
+ */
+struct si_atom {
+	void (*emit)(struct si_context *ctx);
+};
+
 struct si_pm4_state
 {
+	struct si_shader *shader;
+
 	/* optional indirect buffer */
 	struct r600_resource	*indirect_buffer;
 
@@ -52,6 +61,8 @@  struct si_pm4_state
 	struct r600_resource	*bo[SI_PM4_MAX_BO];
 	enum radeon_bo_usage	bo_usage[SI_PM4_MAX_BO];
 	enum radeon_bo_priority	bo_priority[SI_PM4_MAX_BO];
+
+	struct si_atom atom;
 };
 
 void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 89bb5b64a3..7fb09ca953 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -45,13 +45,6 @@  struct si_shader_selector;
 struct si_texture;
 struct si_qbo_state;
 
-/* State atoms are callbacks which write a sequence of packets into a GPU
- * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
- */
-struct si_atom {
-	void (*emit)(struct si_context *ctx);
-};
-
 struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
@@ -284,6 +277,8 @@  enum si_tracked_reg {
 
 	SI_TRACKED_PA_SC_CLIPRECT_RULE,
 
+	SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+
 	SI_NUM_TRACKED_REGS,
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 157a0e37eb..0716021d85 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -452,7 +452,13 @@  static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
 	else
 		shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
-	return shader->pm4;
+	if (shader->pm4) {
+		shader->pm4->shader = shader;
+		return shader->pm4;
+	} else {
+		fprintf(stderr, "radeonsi: Failed to create pm4 state.\n");
+		return NULL;
+	}
 }
 
 static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
@@ -552,6 +558,16 @@  static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 	}
 }
 
+static void si_emit_shader_es(struct si_context *sctx)
+{
+	struct si_shader *shader = sctx->queued.named.es->shader;
+
+	if (shader)
+		radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+					   SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
+					   shader->selector->esgs_itemsize / 4);
+}
+
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
@@ -566,6 +582,7 @@  static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 	if (!pm4)
 		return;
 
+	pm4->atom.emit = si_emit_shader_es;
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
@@ -581,8 +598,6 @@  static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 
 	oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
 
-	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-		       shader->selector->esgs_itemsize / 4);
 	si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
 	si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(va >> 40));
 	si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,

Comments

On Tue, Sep 18, 2018 at 4:21 PM, Sonny Jiang <sonny.jiang@amd.com> wrote:
> Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
> ---
>  src/gallium/drivers/radeonsi/si_gfx_cs.c      |  1 +
>  src/gallium/drivers/radeonsi/si_pm4.c         |  3 +++
>  src/gallium/drivers/radeonsi/si_pm4.h         | 11 ++++++++++
>  src/gallium/drivers/radeonsi/si_state.h       |  9 ++------
>  .../drivers/radeonsi/si_state_shaders.c       | 21 ++++++++++++++++---
>  5 files changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> index 38b85ce624..99376c4847 100644
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> @@ -351,6 +351,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>                 ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]  = 0x3f800000;
>                 ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]  = 0x3f800000;
>                 ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     = 0xffff;
> +               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
>
>                 /* Set all saved registers state to saved. */
>                 ctx->tracked_regs.reg_saved = 0xffffffff;
> diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
> index 446edea49a..1e686d8060 100644
> --- a/src/gallium/drivers/radeonsi/si_pm4.c
> +++ b/src/gallium/drivers/radeonsi/si_pm4.c
> @@ -144,6 +144,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
>                 radeon_emit(cs, ib->gpu_address >> 32);
>                 radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
>         }
> +
> +       if (state->atom.emit)
> +               state->atom.emit(sctx);
>  }
>
>  void si_pm4_reset_emitted(struct si_context *sctx)
> diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
> index 020da7a390..08e37714e4 100644
> --- a/src/gallium/drivers/radeonsi/si_pm4.h
> +++ b/src/gallium/drivers/radeonsi/si_pm4.h
> @@ -33,8 +33,17 @@
>  // forward defines
>  struct si_context;
>
> +/* State atoms are callbacks which write a sequence of packets into a GPU
> + * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
> + */
> +struct si_atom {
> +       void (*emit)(struct si_context *ctx);
> +};
> +
>  struct si_pm4_state
>  {
> +       struct si_shader *shader;
> +
>         /* optional indirect buffer */
>         struct r600_resource    *indirect_buffer;
>
> @@ -52,6 +61,8 @@ struct si_pm4_state
>         struct r600_resource    *bo[SI_PM4_MAX_BO];
>         enum radeon_bo_usage    bo_usage[SI_PM4_MAX_BO];
>         enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO];
> +
> +       struct si_atom atom;

This is good, but I'd like both "atom" and "shader" to be next to each
other, along with a comment why "shader" is in si_pm4_state (i.e. it's
there for shader states only).

>  };
>
>  void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 89bb5b64a3..7fb09ca953 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -45,13 +45,6 @@ struct si_shader_selector;
>  struct si_texture;
>  struct si_qbo_state;
>
> -/* State atoms are callbacks which write a sequence of packets into a GPU
> - * command buffer (AKA indirect buffer, AKA IB, AKA command stream, AKA CS).
> - */
> -struct si_atom {
> -       void (*emit)(struct si_context *ctx);
> -};
> -
>  struct si_state_blend {
>         struct si_pm4_state     pm4;
>         uint32_t                cb_target_mask;
> @@ -284,6 +277,8 @@ enum si_tracked_reg {
>
>         SI_TRACKED_PA_SC_CLIPRECT_RULE,
>
> +       SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
> +
>         SI_NUM_TRACKED_REGS,
>  };
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 157a0e37eb..0716021d85 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -452,7 +452,13 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
>         else
>                 shader->pm4 = CALLOC_STRUCT(si_pm4_state);
>
> -       return shader->pm4;
> +       if (shader->pm4) {
> +               shader->pm4->shader = shader;
> +               return shader->pm4;
> +       } else {
> +               fprintf(stderr, "radeonsi: Failed to create pm4 state.\n");
> +               return NULL;
> +       }
>  }
>
>  static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
> @@ -552,6 +558,16 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
>         }
>  }
>
> +static void si_emit_shader_es(struct si_context *sctx)
> +{
> +       struct si_shader *shader = sctx->queued.named.es->shader;
> +
> +       if (shader)
> +               radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
> +                                          SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
> +                                          shader->selector->esgs_itemsize / 4);

Please use this instead:

if (!shader)
        return;

...

Marek