[2/4] drm/i915/tgl: s/ss/eu fuse reading support

Submitted by Mika Kuoppala on Sept. 12, 2019, 1:38 p.m.

Details

Message ID 20190912133816.1382-2-mika.kuoppala@linux.intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Mika Kuoppala Sept. 12, 2019, 1:38 p.m.
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
some duplicated resources/paths. Although DSS behave similarly to 2
subslices, instead of splitting this and presenting userspace with bits
not directly representative of hardware resources, present userspace
with a subslice_mask made up of DSS bits instead.

Bspec: 29547
Bspec: 12247
Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com> #v1
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: James Ausmus <james.ausmus@intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
 drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
 drivers/gpu/drm/i915/i915_reg.h          |  2 +
 drivers/gpu/drm/i915/intel_device_info.c | 87 ++++++++++++++++++------
 include/uapi/drm/i915_drm.h              |  6 +-
 5 files changed, 76 insertions(+), 31 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 4070f6ff1db6..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -18,12 +18,13 @@  struct drm_i915_private;
 #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
 #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
 #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
-#define GEN_MAX_EUS		(10) /* HSW upper bound */
+#define GEN_MAX_EUS		(16) /* TGL upper bound */
 #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
 
 struct sseu_dev_info {
 	u8 slice_mask;
 	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -40,12 +41,6 @@  struct sseu_dev_info {
 
 	u8 ss_stride;
 	u8 eu_stride;
-
-	/* We don't have more than 8 eus per subslice at the moment and as we
-	 * store eus enabled using bits, no need to multiply by eus per
-	 * subslice.
-	 */
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e5835337f022..f2b92be44adf 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3820,7 +3820,8 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
-			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+			if (info->sseu.has_subslice_pg &&
+			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 				/* skip disabled subslice */
 				continue;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bf37ecebc82f..47847135a11f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2956,6 +2956,8 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
 
+#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
+
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index d9b5baaef5d0..792ca3202073 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -182,13 +182,73 @@  static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 	return total;
 }
 
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
+				    u8 s_en, u32 ss_en, u16 eu_en)
+{
+	int s, ss;
+
+	/* ss_en represents entire subslice mask across all slices */
+	if (sseu->max_slices * sseu->max_subslices >
+	    sizeof(ss_en) * BITS_PER_BYTE) {
+		DRM_ERROR("Invalid topology, max_slices: %d, max_subslices %d\n",
+			  sseu->max_slices, sseu->max_subslices);
+		return;
+	}
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		if ((s_en & BIT(s)) == 0)
+			continue;
+
+		sseu->slice_mask |= BIT(s);
+
+		intel_sseu_set_subslices(sseu, s, ss_en);
+
+		for (ss = 0; ss < sseu->max_subslices; ss++)
+			if (intel_sseu_has_subslice(sseu, s, ss))
+				sseu_set_eus(sseu, s, ss, eu_en);
+	}
+	sseu->eu_per_subslice = hweight16(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
+}
+
+static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
+{
+	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
+	u8 s_en;
+	u32 dss_en;
+	u16 eu_en = 0;
+	u8 eu_en_fuse;
+	int eu;
+
+	/*
+	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
+	 * Instead of splitting these, provide userspace with an array
+	 * of DSS to more closely represent the hardware resource.
+	 */
+	intel_sseu_set_info(sseu, 1, 6, 16);
+
+	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
+
+	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
+
+	/* one bit per pair of EUs */
+	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
+	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+		if (eu_en_fuse & BIT(eu))
+			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+
+	/* TGL only supports slice-level power gating */
+	sseu->has_slice_pg = 1;
+}
+
 static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u8 s_en;
-	u32 ss_en, ss_en_mask;
+	u32 ss_en;
 	u8 eu_en;
-	int s;
 
 	if (IS_ELKHARTLAKE(dev_priv))
 		intel_sseu_set_info(sseu, 1, 4, 8);
@@ -197,26 +257,9 @@  static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
 
 	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
 	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
-	ss_en_mask = BIT(sseu->max_subslices) - 1;
 	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
 
-	for (s = 0; s < sseu->max_slices; s++) {
-		if (s_en & BIT(s)) {
-			int ss_idx = sseu->max_subslices * s;
-			int ss;
-
-			sseu->slice_mask |= BIT(s);
-
-			intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
-							  ss_en_mask);
-
-			for (ss = 0; ss < sseu->max_subslices; ss++)
-				if (intel_sseu_has_subslice(sseu, s, ss))
-					sseu_set_eus(sseu, s, ss, eu_en);
-		}
-	}
-	sseu->eu_per_subslice = hweight8(eu_en);
-	sseu->eu_total = compute_eu_total(sseu);
+	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
 
 	/* ICL has no power gating restrictions. */
 	sseu->has_slice_pg = 1;
@@ -959,8 +1002,10 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		gen9_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 10))
 		gen10_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 11)
+	else if (IS_GEN(dev_priv, 11))
 		gen11_sseu_info_init(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 12)
+		gen12_sseu_info_init(dev_priv);
 
 	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
 		DRM_INFO("Disabling ppGTT for VT-d support\n");
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..30c542144016 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2033,8 +2033,10 @@  struct drm_i915_query {
  *           (data[X / 8] >> (X % 8)) & 1
  *
  * - the subslice mask for each slice with one bit per subslice telling
- *   whether a subslice is available. The availability of subslice Y in slice
- *   X can be queried with the following formula :
+ *   whether a subslice is available. Gen12 has dual-subslices, which are
+ *   similar to two gen11 subslices. For gen12, this array represents dual-
+ *   subslices. The availability of subslice Y in slice X can be queried
+ *   with the following formula :
  *
  *           (data[subslice_offset +
  *                 X * subslice_stride +

Comments

On 12/09/2019 16:38, Mika Kuoppala wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
> some duplicated resources/paths. Although DSS behave similarly to 2
> subslices, instead of splitting this and presenting userspace with bits
> not directly representative of hardware resources, present userspace
> with a subslice_mask made up of DSS bits instead.
>
> Bspec: 29547
> Bspec: 12247
> Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com> #v1
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: José Roberto de Souza <jose.souza@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: James Ausmus <james.ausmus@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
>   drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
>   drivers/gpu/drm/i915/i915_reg.h          |  2 +
>   drivers/gpu/drm/i915/intel_device_info.c | 87 ++++++++++++++++++------
>   include/uapi/drm/i915_drm.h              |  6 +-
>   5 files changed, 76 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 4070f6ff1db6..d1d225204f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -18,12 +18,13 @@ struct drm_i915_private;
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>   #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> -#define GEN_MAX_EUS		(10) /* HSW upper bound */
> +#define GEN_MAX_EUS		(16) /* TGL upper bound */
>   #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>   
>   struct sseu_dev_info {
>   	u8 slice_mask;
>   	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>   	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>   
>   	u8 ss_stride;
>   	u8 eu_stride;
> -
> -	/* We don't have more than 8 eus per subslice at the moment and as we
> -	 * store eus enabled using bits, no need to multiply by eus per
> -	 * subslice.
> -	 */
> -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e5835337f022..f2b92be44adf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3820,7 +3820,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>   			unsigned int eu_cnt;
>   
> -			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> +			if (info->sseu.has_subslice_pg &&
> +			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>   				/* skip disabled subslice */
>   				continue;
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf37ecebc82f..47847135a11f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   
>   #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>   
> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> +
>   #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
>   #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
>   #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d9b5baaef5d0..792ca3202073 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -182,13 +182,73 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>   	return total;
>   }
>   
> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> +				    u8 s_en, u32 ss_en, u16 eu_en)
> +{
> +	int s, ss;
> +
> +	/* ss_en represents entire subslice mask across all slices */
> +	if (sseu->max_slices * sseu->max_subslices >
> +	    sizeof(ss_en) * BITS_PER_BYTE) {
> +		DRM_ERROR("Invalid topology, max_slices: %d, max_subslices %d\n",
> +			  sseu->max_slices, sseu->max_subslices);


Don't you want a GEM_BUG_ON() here to match the rest of the code?

Seems like a driver bug if we reach that case.


Otherwise :


Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


Cheers,


-Lionel


> +		return;
> +	}
> +
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		if ((s_en & BIT(s)) == 0)
> +			continue;
> +
> +		sseu->slice_mask |= BIT(s);
> +
> +		intel_sseu_set_subslices(sseu, s, ss_en);
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++)
> +			if (intel_sseu_has_subslice(sseu, s, ss))
> +				sseu_set_eus(sseu, s, ss, eu_en);
> +	}
> +	sseu->eu_per_subslice = hweight16(eu_en);
> +	sseu->eu_total = compute_eu_total(sseu);
> +}
> +
> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
> +{
> +	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> +	u8 s_en;
> +	u32 dss_en;
> +	u16 eu_en = 0;
> +	u8 eu_en_fuse;
> +	int eu;
> +
> +	/*
> +	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
> +	 * Instead of splitting these, provide userspace with an array
> +	 * of DSS to more closely represent the hardware resource.
> +	 */
> +	intel_sseu_set_info(sseu, 1, 6, 16);
> +
> +	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> +
> +	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> +
> +	/* one bit per pair of EUs */
> +	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
> +	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> +		if (eu_en_fuse & BIT(eu))
> +			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> +
> +	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> +
> +	/* TGL only supports slice-level power gating */
> +	sseu->has_slice_pg = 1;
> +}
> +
>   static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	u8 s_en;
> -	u32 ss_en, ss_en_mask;
> +	u32 ss_en;
>   	u8 eu_en;
> -	int s;
>   
>   	if (IS_ELKHARTLAKE(dev_priv))
>   		intel_sseu_set_info(sseu, 1, 4, 8);
> @@ -197,26 +257,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   
>   	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>   	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> -	ss_en_mask = BIT(sseu->max_subslices) - 1;
>   	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>   
> -	for (s = 0; s < sseu->max_slices; s++) {
> -		if (s_en & BIT(s)) {
> -			int ss_idx = sseu->max_subslices * s;
> -			int ss;
> -
> -			sseu->slice_mask |= BIT(s);
> -
> -			intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
> -							  ss_en_mask);
> -
> -			for (ss = 0; ss < sseu->max_subslices; ss++)
> -				if (intel_sseu_has_subslice(sseu, s, ss))
> -					sseu_set_eus(sseu, s, ss, eu_en);
> -		}
> -	}
> -	sseu->eu_per_subslice = hweight8(eu_en);
> -	sseu->eu_total = compute_eu_total(sseu);
> +	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>   
>   	/* ICL has no power gating restrictions. */
>   	sseu->has_slice_pg = 1;
> @@ -959,8 +1002,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   		gen9_sseu_info_init(dev_priv);
>   	else if (IS_GEN(dev_priv, 10))
>   		gen10_sseu_info_init(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 11)
> +	else if (IS_GEN(dev_priv, 11))
>   		gen11_sseu_info_init(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 12)
> +		gen12_sseu_info_init(dev_priv);
>   
>   	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>   		DRM_INFO("Disabling ppGTT for VT-d support\n");
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..30c542144016 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>    *           (data[X / 8] >> (X % 8)) & 1
>    *
>    * - the subslice mask for each slice with one bit per subslice telling
> - *   whether a subslice is available. The availability of subslice Y in slice
> - *   X can be queried with the following formula :
> + *   whether a subslice is available. Gen12 has dual-subslices, which are
> + *   similar to two gen11 subslices. For gen12, this array represents dual-
> + *   subslices. The availability of subslice Y in slice X can be queried
> + *   with the following formula :
>    *
>    *           (data[subslice_offset +
>    *                 X * subslice_stride +
On 12/09/2019 14:38, Mika Kuoppala wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> 
> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
> some duplicated resources/paths. Although DSS behave similarly to 2
> subslices, instead of splitting this and presenting userspace with bits
> not directly representative of hardware resources, present userspace
> with a subslice_mask made up of DSS bits instead.
> 
> Bspec: 29547
> Bspec: 12247
> Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com> #v1
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: José Roberto de Souza <jose.souza@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: James Ausmus <james.ausmus@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
>   drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
>   drivers/gpu/drm/i915/i915_reg.h          |  2 +
>   drivers/gpu/drm/i915/intel_device_info.c | 87 ++++++++++++++++++------
>   include/uapi/drm/i915_drm.h              |  6 +-
>   5 files changed, 76 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 4070f6ff1db6..d1d225204f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -18,12 +18,13 @@ struct drm_i915_private;
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>   #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> -#define GEN_MAX_EUS		(10) /* HSW upper bound */
> +#define GEN_MAX_EUS		(16) /* TGL upper bound */
>   #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>   
>   struct sseu_dev_info {
>   	u8 slice_mask;
>   	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>   	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>   
>   	u8 ss_stride;
>   	u8 eu_stride;
> -
> -	/* We don't have more than 8 eus per subslice at the moment and as we
> -	 * store eus enabled using bits, no need to multiply by eus per
> -	 * subslice.
> -	 */
> -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];

Comment indeed looks obsolete even with old GEM_MAX_EUS.

Howewer more importantly, was the code broken before? Now we size 
considering the stride, but before GEN_MAX_EU_STRIDE was 
GEN_SSEU_STRIDE(GEN_MAX_EUS) = DIV_ROUND_UP(10, BITS_PER_BYTE) = 2, no? 
So wasn't the array too small?

P.S. Moving the position of the field is just noise?

>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e5835337f022..f2b92be44adf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3820,7 +3820,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>   			unsigned int eu_cnt;
>   
> -			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> +			if (info->sseu.has_subslice_pg &&
> +			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>   				/* skip disabled subslice */
>   				continue;
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf37ecebc82f..47847135a11f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   
>   #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>   
> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> +
>   #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
>   #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
>   #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d9b5baaef5d0..792ca3202073 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -182,13 +182,73 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>   	return total;
>   }
>   
> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> +				    u8 s_en, u32 ss_en, u16 eu_en)
> +{
> +	int s, ss;
> +
> +	/* ss_en represents entire subslice mask across all slices */
> +	if (sseu->max_slices * sseu->max_subslices >
> +	    sizeof(ss_en) * BITS_PER_BYTE) {
> +		DRM_ERROR("Invalid topology, max_slices: %d, max_subslices %d\n",
> +			  sseu->max_slices, sseu->max_subslices);
> +		return;
> +	}
> +
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		if ((s_en & BIT(s)) == 0)
> +			continue;
> +
> +		sseu->slice_mask |= BIT(s);
> +
> +		intel_sseu_set_subslices(sseu, s, ss_en);
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++)
> +			if (intel_sseu_has_subslice(sseu, s, ss))
> +				sseu_set_eus(sseu, s, ss, eu_en);
> +	}
> +	sseu->eu_per_subslice = hweight16(eu_en);
> +	sseu->eu_total = compute_eu_total(sseu);
> +}

Can we be kind to reviewers and extract this helper in a separate 
preceding patch? It even modifies the loop while extracting it so double 
reason to separate.

> +
> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
> +{
> +	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> +	u8 s_en;
> +	u32 dss_en;
> +	u16 eu_en = 0;
> +	u8 eu_en_fuse;
> +	int eu;
> +
> +	/*
> +	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
> +	 * Instead of splitting these, provide userspace with an array
> +	 * of DSS to more closely represent the hardware resource.
> +	 */

My pet peeve about this code shouldnt' have been made to be about 
userspace at all... i915_query.c/query_topology_info is where userspace 
struct drm_i915_query_topology_info is populated :I Rant over.. Sorry 
Mika, not directed to you as the current shepherd of this patch.

> +	intel_sseu_set_info(sseu, 1, 6, 16);
> +
> +	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> +
> +	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> +
> +	/* one bit per pair of EUs */
> +	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
> +	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> +		if (eu_en_fuse & BIT(eu))
> +			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> +
> +	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> +
> +	/* TGL only supports slice-level power gating */
> +	sseu->has_slice_pg = 1;
> +}
> +
>   static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	u8 s_en;
> -	u32 ss_en, ss_en_mask;
> +	u32 ss_en;
>   	u8 eu_en;
> -	int s;
>   
>   	if (IS_ELKHARTLAKE(dev_priv))
>   		intel_sseu_set_info(sseu, 1, 4, 8);
> @@ -197,26 +257,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   
>   	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>   	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> -	ss_en_mask = BIT(sseu->max_subslices) - 1;
>   	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>   
> -	for (s = 0; s < sseu->max_slices; s++) {
> -		if (s_en & BIT(s)) {
> -			int ss_idx = sseu->max_subslices * s;
> -			int ss;
> -
> -			sseu->slice_mask |= BIT(s);
> -
> -			intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
> -							  ss_en_mask);
> -
> -			for (ss = 0; ss < sseu->max_subslices; ss++)
> -				if (intel_sseu_has_subslice(sseu, s, ss))
> -					sseu_set_eus(sseu, s, ss, eu_en);
> -		}
> -	}
> -	sseu->eu_per_subslice = hweight8(eu_en);
> -	sseu->eu_total = compute_eu_total(sseu);
> +	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>   
>   	/* ICL has no power gating restrictions. */
>   	sseu->has_slice_pg = 1;
> @@ -959,8 +1002,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   		gen9_sseu_info_init(dev_priv);
>   	else if (IS_GEN(dev_priv, 10))
>   		gen10_sseu_info_init(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 11)
> +	else if (IS_GEN(dev_priv, 11))
>   		gen11_sseu_info_init(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 12)
> +		gen12_sseu_info_init(dev_priv);
>   
>   	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>   		DRM_INFO("Disabling ppGTT for VT-d support\n");
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..30c542144016 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>    *           (data[X / 8] >> (X % 8)) & 1
>    *
>    * - the subslice mask for each slice with one bit per subslice telling
> - *   whether a subslice is available. The availability of subslice Y in slice
> - *   X can be queried with the following formula :
> + *   whether a subslice is available. Gen12 has dual-subslices, which are
> + *   similar to two gen11 subslices. For gen12, this array represents dual-

It's ugly in user facing documentation if we cannot decide whether it is 
Gen12 or gen12. Gen12 special case also probably warrants to be in its 
own paragraph.

Maybe also be clearer by saying each *bit* in this array represents one 
dual-subslice in Gen12.

> + *   subslices. The availability of subslice Y in slice X can be queried
> + *   with the following formula :
>    *
>    *           (data[subslice_offset +
>    *                 X * subslice_stride +
> 

Just a casual read since I was copied, I am not assigning myself to be a 
reviewer. :)

Regards,

Tvrtko
Quoting Tvrtko Ursulin (2019-09-12 16:18:08)
> 
> On 12/09/2019 14:38, Mika Kuoppala wrote:
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 469dc512cca3..30c542144016 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -2033,8 +2033,10 @@ struct drm_i915_query {
> >    *           (data[X / 8] >> (X % 8)) & 1
> >    *
> >    * - the subslice mask for each slice with one bit per subslice telling
> > - *   whether a subslice is available. The availability of subslice Y in slice
> > - *   X can be queried with the following formula :
> > + *   whether a subslice is available. Gen12 has dual-subslices, which are
> > + *   similar to two gen11 subslices. For gen12, this array represents dual-
> 
> It's ugly in user facing documentation if we cannot decide whether it is 
> Gen12 or gen12. Gen12 special case also probably warrants to be in its 
> own paragraph.

Here it was using sentence capitalisation, which suits it if we are
treating it as an ordinary noun. If went with a proper noun, then Gen12
throughout. I might be wrong, but my impression is that we've
historically used ordinary nouns (gen5-8, gen11, etc).
-Chris
On 12/09/2019 16:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-12 16:18:08)
>>
>> On 12/09/2019 14:38, Mika Kuoppala wrote:
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 469dc512cca3..30c542144016 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>>>     *           (data[X / 8] >> (X % 8)) & 1
>>>     *
>>>     * - the subslice mask for each slice with one bit per subslice telling
>>> - *   whether a subslice is available. The availability of subslice Y in slice
>>> - *   X can be queried with the following formula :
>>> + *   whether a subslice is available. Gen12 has dual-subslices, which are
>>> + *   similar to two gen11 subslices. For gen12, this array represents dual-
>>
>> It's ugly in user facing documentation if we cannot decide whether it is
>> Gen12 or gen12. Gen12 special case also probably warrants to be in its
>> own paragraph.
> 
> Here it was using sentence capitalisation, which suits it if we are
> treating it as an ordinary noun. If went with a proper noun, then Gen12
> throughout. I might be wrong, but my impression is that we've
> historically used ordinary nouns (gen5-8, gen11, etc).

My bad.

Regards,

Tvrtko