[v2,23/40] drm/i915/tgl: Register state context definition for Gen12

Submitted by Lucas De Marchi on Aug. 17, 2019, 9:38 a.m.

Details

Message ID 20190817093902.2171-24-lucas.demarchi@intel.com
State New
Headers show
Series "Tiger Lake batch 3" ( rev: 4 3 2 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Lucas De Marchi Aug. 17, 2019, 9:38 a.m.
From: Michel Thierry <michel.thierry@intel.com>

Gen12 has subtle changes in the reg state context offsets (some fields
are gone, some are in a different location), compared to previous Gens.

The simplest approach seems to be keeping Gen12 (and future platform)
changes apart from the previous gens, while keeping the registers that
are contiguous in functions we can reuse.

Bspec: 20202
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 156 +++++++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_lrc.h     |   2 +
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  30 ++++-
 3 files changed, 143 insertions(+), 45 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e30d2a892f29..1fe83736f064 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3068,28 +3068,12 @@  static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	return indirect_ctx_offset;
 }
 
-static void execlists_init_reg_state(u32 *regs,
-				     struct intel_context *ce,
-				     struct intel_engine_cs *engine,
-				     struct intel_ring *ring)
+static void init_common_reg_state(u32 *regs,
+				  struct intel_engine_cs *engine,
+				  struct intel_ring *ring)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
-	bool rcs = engine->class == RENDER_CLASS;
 	u32 base = engine->mmio_base;
 
-	/*
-	 * A context is actually a big batch buffer with several
-	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
-	 * values we are setting here are only for the first context restore:
-	 * on a subsequent save, the GPU will recreate this batchbuffer with new
-	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
-	 * we are not initializing here).
-	 *
-	 * Must keep consistent with virtual_update_register_offsets().
-	 */
-	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
-				 MI_LRI_FORCE_POSTED;
-
 	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
 		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
 		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
@@ -3106,38 +3090,44 @@  static void execlists_init_reg_state(u32 *regs,
 	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
 	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
 	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
-	if (rcs) {
-		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
-			RING_INDIRECT_CTX_OFFSET(base), 0);
-		if (wa_ctx->indirect_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+}
 
-			regs[CTX_RCS_INDIRECT_CTX + 1] =
-				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
-				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+static void init_wa_bb_reg_state(u32 *regs,
+				 struct intel_engine_cs *engine,
+				 u32 pos_bb_per_ctx)
+{
+	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+	u32 base = engine->mmio_base;
+	u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
+	u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
 
-			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
-				intel_lr_indirect_ctx_offset(engine) << 6;
-		}
+	GEM_BUG_ON(engine->id != RCS0);
+	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
+	CTX_REG(regs, pos_indirect_ctx_offset,
+		RING_INDIRECT_CTX_OFFSET(base), 0);
+	if (wa_ctx->indirect_ctx.size) {
+		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
-		if (wa_ctx->per_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+		regs[pos_indirect_ctx + 1] =
+			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
+			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
 
-			regs[CTX_BB_PER_CTX_PTR + 1] =
-				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
-		}
+		regs[pos_indirect_ctx_offset + 1] =
+			intel_lr_indirect_ctx_offset(engine) << 6;
 	}
 
-	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
+	if (wa_ctx->per_ctx.size) {
+		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+		regs[pos_bb_per_ctx + 1] =
+			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+	}
+}
+
+static void init_ppgtt_reg_state(u32 *regs, u32 base,
+				 struct i915_ppgtt *ppgtt)
+{
 	/* PDP values well be assigned later if needed */
 	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
 	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
@@ -3160,6 +3150,32 @@  static void execlists_init_reg_state(u32 *regs,
 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
 	}
+}
+
+static void gen8_init_reg_state(u32 *regs,
+				struct intel_context *ce,
+				struct intel_engine_cs *engine,
+				struct intel_ring *ring)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+	bool rcs = engine->class == RENDER_CLASS;
+	u32 base = engine->mmio_base;
+
+	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
+				 MI_LRI_FORCE_POSTED;
+
+	init_common_reg_state(regs, engine, ring);
+	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+	if (rcs)
+		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
+
+	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+
+	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+	init_ppgtt_reg_state(regs, base, ppgtt);
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
@@ -3171,6 +3187,58 @@  static void execlists_init_reg_state(u32 *regs,
 		regs[CTX_END] |= BIT(0);
 }
 
+static void gen12_init_reg_state(u32 *regs,
+				 struct intel_context *ce,
+				 struct intel_engine_cs *engine,
+				 struct intel_ring *ring)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+	bool rcs = engine->class == RENDER_CLASS;
+	u32 base = engine->mmio_base;
+
+	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
+
+	regs[GEN12_CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |
+				       MI_LRI_FORCE_POSTED;
+
+	init_common_reg_state(regs, engine, ring);
+	if (rcs)
+		init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
+
+	regs[GEN12_CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
+				       MI_LRI_FORCE_POSTED;
+
+	CTX_REG(regs, GEN12_CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+	init_ppgtt_reg_state(regs, base, ppgtt);
+
+	if (rcs) {
+		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
+		CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+			0);
+
+		/* TODO: oa_init_reg_state ? */
+	}
+}
+
+static void execlists_init_reg_state(u32 *regs,
+				     struct intel_context *ce,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring)
+{
+	/* A context is actually a big batch buffer with several
+	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+	 * values we are setting here are only for the first context restore:
+	 * on a subsequent save, the GPU will recreate this batchbuffer with new
+	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+	 * we are not initializing here).
+	 */
+	if (INTEL_GEN(engine->i915) >= 12)
+		gen12_init_reg_state(regs, ce, engine, ring);
+	else
+		gen8_init_reg_state(regs, ce, engine, ring);
+}
+
 static int
 populate_lr_context(struct intel_context *ce,
 		    struct drm_i915_gem_object *ctx_obj,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index c2bba82bcc16..69285d354d9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -49,6 +49,8 @@  struct intel_engine_cs;
 
 #define	  EL_CTRL_LOAD				(1 << 0)
 
+#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine)	_MMIO((engine)->mmio_base + 0x2b4)
+
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
  * wraps to 0."
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 6bf34738b4e5..915824ebaf17 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,7 +9,7 @@ 
 
 #include <linux/types.h>
 
-/* GEN8+ Reg State Context */
+/* GEN8 to GEN11 Reg State Context */
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -39,6 +39,34 @@ 
 #define CTX_R_PWR_CLK_STATE		0x42
 #define CTX_END				0x44
 
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_LRI_HEADER_0			CTX_LRI_HEADER_0
+#define GEN12_CTX_CONTEXT_CONTROL		CTX_CONTEXT_CONTROL
+#define GEN12_CTX_RING_HEAD			CTX_RING_HEAD
+#define GEN12_CTX_RING_TAIL			CTX_RING_TAIL
+#define GEN12_CTX_RING_BUFFER_START		CTX_RING_BUFFER_START
+#define GEN12_CTX_RING_BUFFER_CONTROL		CTX_RING_BUFFER_CONTROL
+#define GEN12_CTX_BB_HEAD_U			CTX_BB_HEAD_U
+#define GEN12_CTX_BB_HEAD_L			CTX_BB_HEAD_L
+#define GEN12_CTX_BB_STATE			CTX_BB_STATE
+#define GEN12_CTX_BB_PER_CTX_PTR		0x12
+#define GEN12_CTX_RCS_INDIRECT_CTX		0x14
+#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET	0x16
+#define GEN12_CTX_LRI_HEADER_1			CTX_LRI_HEADER_1
+#define GEN12_CTX_CTX_TIMESTAMP			CTX_CTX_TIMESTAMP
+#define GEN12_CTX_PDP3_UDW			CTX_PDP3_UDW
+#define GEN12_CTX_PDP3_LDW			CTX_PDP3_LDW
+#define GEN12_CTX_PDP2_UDW			CTX_PDP2_UDW
+#define GEN12_CTX_PDP2_LDW			CTX_PDP2_LDW
+#define GEN12_CTX_PDP1_UDW			CTX_PDP1_UDW
+#define GEN12_CTX_PDP1_LDW			CTX_PDP1_LDW
+#define GEN12_CTX_PDP0_UDW			CTX_PDP0_UDW
+#define GEN12_CTX_PDP0_LDW			CTX_PDP0_LDW
+#define GEN12_CTX_LRI_HEADER_2			0x34
+#define GEN12_CTX_LRI_HEADER_3			0x41
+#define GEN12_CTX_R_PWR_CLK_STATE		0x42
+#define GEN12_CTX_GPGPU_CSR_BASE_ADDRESS	0x44
+
 #define CTX_REG(reg_state, pos, reg, val) do { \
 	u32 *reg_state__ = (reg_state); \
 	const u32 pos__ = (pos); \

Comments

On 8/17/19 2:38 AM, Lucas De Marchi wrote:
> From: Michel Thierry <michel.thierry@intel.com>
> 
> Gen12 has subtle changes in the reg state context offsets (some fields
> are gone, some are in a different location), compared to previous Gens.
> 
> The simplest approach seems to be keeping Gen12 (and future platform)
> changes apart from the previous gens, while keeping the registers that
> are contiguous in functions we can reuse.
> 
> Bspec: 20202

I'd use 46255 instead as reference

> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c     | 156 +++++++++++++++++-------
>   drivers/gpu/drm/i915/gt/intel_lrc.h     |   2 +
>   drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  30 ++++-
>   3 files changed, 143 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e30d2a892f29..1fe83736f064 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3068,28 +3068,12 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>   	return indirect_ctx_offset;
>   }
>   
> -static void execlists_init_reg_state(u32 *regs,
> -				     struct intel_context *ce,
> -				     struct intel_engine_cs *engine,
> -				     struct intel_ring *ring)
> +static void init_common_reg_state(u32 *regs,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_ring *ring)
>   {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> -	bool rcs = engine->class == RENDER_CLASS;
>   	u32 base = engine->mmio_base;
>   
> -	/*
> -	 * A context is actually a big batch buffer with several
> -	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> -	 * values we are setting here are only for the first context restore:
> -	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> -	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> -	 * we are not initializing here).
> -	 *
> -	 * Must keep consistent with virtual_update_register_offsets().

As the comment here says, virtual_update_register_offsets() needs 
updating as well (unless we switch to relative MMIOs, which are 
supported gen11+).

> -	 */
> -	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> -				 MI_LRI_FORCE_POSTED;
> -
>   	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>   		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
>   		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> @@ -3106,38 +3090,44 @@ static void execlists_init_reg_state(u32 *regs,
>   	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>   	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>   	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> -	if (rcs) {
> -		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> -			RING_INDIRECT_CTX_OFFSET(base), 0);
> -		if (wa_ctx->indirect_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +}
>   
> -			regs[CTX_RCS_INDIRECT_CTX + 1] =
> -				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> -				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
> +static void init_wa_bb_reg_state(u32 *regs,
> +				 struct intel_engine_cs *engine,
> +				 u32 pos_bb_per_ctx)
> +{
> +	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> +	u32 base = engine->mmio_base;
> +	u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
> +	u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
>   
> -			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
> -				intel_lr_indirect_ctx_offset(engine) << 6;
> -		}
> +	GEM_BUG_ON(engine->id != RCS0);

IMO better to use engine->class here

> +	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
> +	CTX_REG(regs, pos_indirect_ctx_offset,
> +		RING_INDIRECT_CTX_OFFSET(base), 0);
> +	if (wa_ctx->indirect_ctx.size) {
> +		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> -		if (wa_ctx->per_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +		regs[pos_indirect_ctx + 1] =
> +			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> +			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
>   
> -			regs[CTX_BB_PER_CTX_PTR + 1] =
> -				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> -		}
> +		regs[pos_indirect_ctx_offset + 1] =
> +			intel_lr_indirect_ctx_offset(engine) << 6;
>   	}
>   
> -	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
> +	if (wa_ctx->per_ctx.size) {
> +		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +		regs[pos_bb_per_ctx + 1] =
> +			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> +	}
> +}
> +
> +static void init_ppgtt_reg_state(u32 *regs, u32 base,
> +				 struct i915_ppgtt *ppgtt)
> +{
>   	/* PDP values well be assigned later if needed */
>   	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>   	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> @@ -3160,6 +3150,32 @@ static void execlists_init_reg_state(u32 *regs,
>   		ASSIGN_CTX_PDP(ppgtt, regs, 1);
>   		ASSIGN_CTX_PDP(ppgtt, regs, 0);
>   	}
> +}
> +
> +static void gen8_init_reg_state(u32 *regs,
> +				struct intel_context *ce,
> +				struct intel_engine_cs *engine,
> +				struct intel_ring *ring)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> +	bool rcs = engine->class == RENDER_CLASS;
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> +				 MI_LRI_FORCE_POSTED;
> +
> +	init_common_reg_state(regs, engine, ring);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
>   
>   	if (rcs) {
>   		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> @@ -3171,6 +3187,58 @@ static void execlists_init_reg_state(u32 *regs,
>   		regs[CTX_END] |= BIT(0);
>   }
>   
> +static void gen12_init_reg_state(u32 *regs,
> +				 struct intel_context *ce,
> +				 struct intel_engine_cs *engine,
> +				 struct intel_ring *ring)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> +	bool rcs = engine->class == RENDER_CLASS;
> +	u32 base = engine->mmio_base;
> +
> +	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> +
> +	regs[GEN12_CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |

We're currently not writing 13 regs, but we'll have to write the 13th on 
all engines for the new semaphore stuff, so ack in using 13 now to be 
future proof.

> +				       MI_LRI_FORCE_POSTED;
> +
> +	init_common_reg_state(regs, engine, ring);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
> +
> +	regs[GEN12_CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> +				       MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, GEN12_CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
> +
> +	if (rcs) {
> +		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
> +		CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> +			0);
> +
> +		/* TODO: oa_init_reg_state ? */
> +	}
> +}
> +
> +static void execlists_init_reg_state(u32 *regs,
> +				     struct intel_context *ce,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring)
> +{
> +	/* A context is actually a big batch buffer with several
> +	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> +	 * values we are setting here are only for the first context restore:
> +	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> +	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> +	 * we are not initializing here).
> +	 */
> +	if (INTEL_GEN(engine->i915) >= 12)
> +		gen12_init_reg_state(regs, ce, engine, ring);
> +	else
> +		gen8_init_reg_state(regs, ce, engine, ring);
> +}
> +
>   static int
>   populate_lr_context(struct intel_context *ce,
>   		    struct drm_i915_gem_object *ctx_obj,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index c2bba82bcc16..69285d354d9b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -49,6 +49,8 @@ struct intel_engine_cs;
>   
>   #define	  EL_CTRL_LOAD				(1 << 0)
>   
> +#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine)	_MMIO((engine)->mmio_base + 0x2b4)

defining this without using it or defining its position in the context 
(0x1A) feels a bit confusing IMO.

> +
>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
>    * wraps to 0."
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 6bf34738b4e5..915824ebaf17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -9,7 +9,7 @@
>   
>   #include <linux/types.h>
>   
> -/* GEN8+ Reg State Context */
> +/* GEN8 to GEN11 Reg State Context */
>   #define CTX_LRI_HEADER_0		0x01
>   #define CTX_CONTEXT_CONTROL		0x02
>   #define CTX_RING_HEAD			0x04
> @@ -39,6 +39,34 @@
>   #define CTX_R_PWR_CLK_STATE		0x42
>   #define CTX_END				0x44
>   
> +/* GEN12+ Reg State Context */
> +#define GEN12_CTX_LRI_HEADER_0			CTX_LRI_HEADER_0
> +#define GEN12_CTX_CONTEXT_CONTROL		CTX_CONTEXT_CONTROL
> +#define GEN12_CTX_RING_HEAD			CTX_RING_HEAD
> +#define GEN12_CTX_RING_TAIL			CTX_RING_TAIL
> +#define GEN12_CTX_RING_BUFFER_START		CTX_RING_BUFFER_START
> +#define GEN12_CTX_RING_BUFFER_CONTROL		CTX_RING_BUFFER_CONTROL
> +#define GEN12_CTX_BB_HEAD_U			CTX_BB_HEAD_U
> +#define GEN12_CTX_BB_HEAD_L			CTX_BB_HEAD_L
> +#define GEN12_CTX_BB_STATE			CTX_BB_STATE
> +#define GEN12_CTX_BB_PER_CTX_PTR		0x12
> +#define GEN12_CTX_RCS_INDIRECT_CTX		0x14
> +#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET	0x16

Matches the specs

> +#define GEN12_CTX_LRI_HEADER_1			CTX_LRI_HEADER_1
> +#define GEN12_CTX_CTX_TIMESTAMP			CTX_CTX_TIMESTAMP
> +#define GEN12_CTX_PDP3_UDW			CTX_PDP3_UDW
> +#define GEN12_CTX_PDP3_LDW			CTX_PDP3_LDW
> +#define GEN12_CTX_PDP2_UDW			CTX_PDP2_UDW
> +#define GEN12_CTX_PDP2_LDW			CTX_PDP2_LDW
> +#define GEN12_CTX_PDP1_UDW			CTX_PDP1_UDW
> +#define GEN12_CTX_PDP1_LDW			CTX_PDP1_LDW
> +#define GEN12_CTX_PDP0_UDW			CTX_PDP0_UDW
> +#define GEN12_CTX_PDP0_LDW			CTX_PDP0_LDW
> +#define GEN12_CTX_LRI_HEADER_2			0x34
> +#define GEN12_CTX_LRI_HEADER_3			0x41

The LRI offsets are unchanged, we just didn't define one of the LRIs in 
the legacy defs because we don't set up any of the relevant registers. 
We will need it in TGL, but for consistency it might be worth updating 
the legacy defs as well to not give the illusion that stuff has moved 
around.

> +#define GEN12_CTX_R_PWR_CLK_STATE		0x42
> +#define GEN12_CTX_GPGPU_CSR_BASE_ADDRESS	0x44

Both of these are part of the legacy context as well. we do have a 
legacy define for CTX_R_PWR_CLK_STATE, while AFAICS 
CTX_GPGPU_CSR_BASE_ADDRESS is not a register to program so not sure we 
need the define.

Daniele

> +
>   #define CTX_REG(reg_state, pos, reg, val) do { \
>   	u32 *reg_state__ = (reg_state); \
>   	const u32 pos__ = (pos); \
>
Lucas De Marchi <lucas.demarchi@intel.com> writes:

> From: Michel Thierry <michel.thierry@intel.com>
>
> Gen12 has subtle changes in the reg state context offsets (some fields
> are gone, some are in a different location), compared to previous Gens.
>
> The simplest approach seems to be keeping Gen12 (and future platform)
> changes apart from the previous gens, while keeping the registers that
> are contiguous in functions we can reuse.
>
> Bspec: 20202
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c     | 156 +++++++++++++++++-------
>  drivers/gpu/drm/i915/gt/intel_lrc.h     |   2 +
>  drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  30 ++++-
>  3 files changed, 143 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e30d2a892f29..1fe83736f064 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3068,28 +3068,12 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>  	return indirect_ctx_offset;
>  }
>  
> -static void execlists_init_reg_state(u32 *regs,
> -				     struct intel_context *ce,
> -				     struct intel_engine_cs *engine,
> -				     struct intel_ring *ring)
> +static void init_common_reg_state(u32 *regs,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_ring *ring)
>  {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> -	bool rcs = engine->class == RENDER_CLASS;
>  	u32 base = engine->mmio_base;
>  
> -	/*
> -	 * A context is actually a big batch buffer with several
> -	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> -	 * values we are setting here are only for the first context restore:
> -	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> -	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> -	 * we are not initializing here).
> -	 *
> -	 * Must keep consistent with virtual_update_register_offsets().
> -	 */
> -	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> -				 MI_LRI_FORCE_POSTED;
> -
>  	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>  		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
>  		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> @@ -3106,38 +3090,44 @@ static void execlists_init_reg_state(u32 *regs,
>  	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>  	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>  	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> -	if (rcs) {
> -		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> -			RING_INDIRECT_CTX_OFFSET(base), 0);
> -		if (wa_ctx->indirect_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +}
>  
> -			regs[CTX_RCS_INDIRECT_CTX + 1] =
> -				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> -				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
> +static void init_wa_bb_reg_state(u32 *regs,
> +				 struct intel_engine_cs *engine,
> +				 u32 pos_bb_per_ctx)
> +{
> +	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> +	u32 base = engine->mmio_base;
> +	u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
> +	u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
>  
> -			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
> -				intel_lr_indirect_ctx_offset(engine) << 6;
> -		}
> +	GEM_BUG_ON(engine->id != RCS0);
> +	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
> +	CTX_REG(regs, pos_indirect_ctx_offset,
> +		RING_INDIRECT_CTX_OFFSET(base), 0);
> +	if (wa_ctx->indirect_ctx.size) {
> +		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>  
> -		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> -		if (wa_ctx->per_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +		regs[pos_indirect_ctx + 1] =
> +			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> +			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
>  
> -			regs[CTX_BB_PER_CTX_PTR + 1] =
> -				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> -		}
> +		regs[pos_indirect_ctx_offset + 1] =
> +			intel_lr_indirect_ctx_offset(engine) << 6;
>  	}
>  
> -	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
> +	if (wa_ctx->per_ctx.size) {
> +		u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>  
> -	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +		regs[pos_bb_per_ctx + 1] =
> +			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> +	}
> +}
> +
> +static void init_ppgtt_reg_state(u32 *regs, u32 base,
> +				 struct i915_ppgtt *ppgtt)
> +{
>  	/* PDP values well be assigned later if needed */
>  	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>  	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> @@ -3160,6 +3150,32 @@ static void execlists_init_reg_state(u32 *regs,
>  		ASSIGN_CTX_PDP(ppgtt, regs, 1);
>  		ASSIGN_CTX_PDP(ppgtt, regs, 0);
>  	}
> +}
> +
> +static void gen8_init_reg_state(u32 *regs,
> +				struct intel_context *ce,
> +				struct intel_engine_cs *engine,
> +				struct intel_ring *ring)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> +	bool rcs = engine->class == RENDER_CLASS;
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> +				 MI_LRI_FORCE_POSTED;

It could be better that the imm batch size would be contained
in the actual helper which does it. But as this would change
the imm sizes, let's not go there, atleast not yet until
everything is green on CI.

But the confusion I have is with the virtual engines.
According to comments the virtual engine reg state setup
has to mimic the execlist one exactly. With this
gen12/gen8 split, we break the symmetry. Do we
need same split in there?

-Mika


> +
> +	init_common_reg_state(regs, engine, ring);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
>  
>  	if (rcs) {
>  		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> @@ -3171,6 +3187,58 @@ static void execlists_init_reg_state(u32 *regs,
>  		regs[CTX_END] |= BIT(0);
>  }
>  
> +static void gen12_init_reg_state(u32 *regs,
> +				 struct intel_context *ce,
> +				 struct intel_engine_cs *engine,
> +				 struct intel_ring *ring)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> +	bool rcs = engine->class == RENDER_CLASS;
> +	u32 base = engine->mmio_base;
> +
> +	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> +
> +	regs[GEN12_CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |
> +				       MI_LRI_FORCE_POSTED;
> +
> +	init_common_reg_state(regs, engine, ring);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
> +
> +	regs[GEN12_CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> +				       MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, GEN12_CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
> +
> +	if (rcs) {
> +		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
> +		CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> +			0);
> +
> +		/* TODO: oa_init_reg_state ? */
> +	}
> +}
> +
> +static void execlists_init_reg_state(u32 *regs,
> +				     struct intel_context *ce,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring)
> +{
> +	/* A context is actually a big batch buffer with several
> +	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> +	 * values we are setting here are only for the first context restore:
> +	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> +	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> +	 * we are not initializing here).
> +	 */
> +	if (INTEL_GEN(engine->i915) >= 12)
> +		gen12_init_reg_state(regs, ce, engine, ring);
> +	else
> +		gen8_init_reg_state(regs, ce, engine, ring);
> +}
> +
>  static int
>  populate_lr_context(struct intel_context *ce,
>  		    struct drm_i915_gem_object *ctx_obj,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index c2bba82bcc16..69285d354d9b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -49,6 +49,8 @@ struct intel_engine_cs;
>  
>  #define	  EL_CTRL_LOAD				(1 << 0)
>  
> +#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine)	_MMIO((engine)->mmio_base + 0x2b4)
> +
>  /* The docs specify that the write pointer wraps around after 5h, "After status
>   * is written out to the last available status QW at offset 5h, this pointer
>   * wraps to 0."
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 6bf34738b4e5..915824ebaf17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -9,7 +9,7 @@
>  
>  #include <linux/types.h>
>  
> -/* GEN8+ Reg State Context */
> +/* GEN8 to GEN11 Reg State Context */
>  #define CTX_LRI_HEADER_0		0x01
>  #define CTX_CONTEXT_CONTROL		0x02
>  #define CTX_RING_HEAD			0x04
> @@ -39,6 +39,34 @@
>  #define CTX_R_PWR_CLK_STATE		0x42
>  #define CTX_END				0x44
>  
> +/* GEN12+ Reg State Context */
> +#define GEN12_CTX_LRI_HEADER_0			CTX_LRI_HEADER_0
> +#define GEN12_CTX_CONTEXT_CONTROL		CTX_CONTEXT_CONTROL
> +#define GEN12_CTX_RING_HEAD			CTX_RING_HEAD
> +#define GEN12_CTX_RING_TAIL			CTX_RING_TAIL
> +#define GEN12_CTX_RING_BUFFER_START		CTX_RING_BUFFER_START
> +#define GEN12_CTX_RING_BUFFER_CONTROL		CTX_RING_BUFFER_CONTROL
> +#define GEN12_CTX_BB_HEAD_U			CTX_BB_HEAD_U
> +#define GEN12_CTX_BB_HEAD_L			CTX_BB_HEAD_L
> +#define GEN12_CTX_BB_STATE			CTX_BB_STATE
> +#define GEN12_CTX_BB_PER_CTX_PTR		0x12
> +#define GEN12_CTX_RCS_INDIRECT_CTX		0x14
> +#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET	0x16
> +#define GEN12_CTX_LRI_HEADER_1			CTX_LRI_HEADER_1
> +#define GEN12_CTX_CTX_TIMESTAMP			CTX_CTX_TIMESTAMP
> +#define GEN12_CTX_PDP3_UDW			CTX_PDP3_UDW
> +#define GEN12_CTX_PDP3_LDW			CTX_PDP3_LDW
> +#define GEN12_CTX_PDP2_UDW			CTX_PDP2_UDW
> +#define GEN12_CTX_PDP2_LDW			CTX_PDP2_LDW
> +#define GEN12_CTX_PDP1_UDW			CTX_PDP1_UDW
> +#define GEN12_CTX_PDP1_LDW			CTX_PDP1_LDW
> +#define GEN12_CTX_PDP0_UDW			CTX_PDP0_UDW
> +#define GEN12_CTX_PDP0_LDW			CTX_PDP0_LDW
> +#define GEN12_CTX_LRI_HEADER_2			0x34
> +#define GEN12_CTX_LRI_HEADER_3			0x41
> +#define GEN12_CTX_R_PWR_CLK_STATE		0x42
> +#define GEN12_CTX_GPGPU_CSR_BASE_ADDRESS	0x44
> +
>  #define CTX_REG(reg_state, pos, reg, val) do { \
>  	u32 *reg_state__ = (reg_state); \
>  	const u32 pos__ = (pos); \
> -- 
> 2.21.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Mika Kuoppala (2019-08-22 14:31:53)
> Lucas De Marchi <lucas.demarchi@intel.com> writes:
> > +static void gen8_init_reg_state(u32 *regs,
> > +                             struct intel_context *ce,
> > +                             struct intel_engine_cs *engine,
> > +                             struct intel_ring *ring)
> > +{
> > +     struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
> > +     bool rcs = engine->class == RENDER_CLASS;
> > +     u32 base = engine->mmio_base;
> > +
> > +     regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> > +                              MI_LRI_FORCE_POSTED;
> 
> It could be better that the imm batch size would be contained
> in the actual helper which does it. But as this would change
> the imm sizes, let's not go there, atleast not yet until
> everything is green on CI.
> 
> But the confusion I have is with the virtual engines.
> According to comments the virtual engine reg state setup
> has to mimic the execlist one exactly. With this
> gen12/gen8 split, we break the symmetry. Do we
> need same split in there?

If the layout of the context image changes, yes we do as we just update
the register offsets in situ.
-Chris