[2/2] drm/i915/tgl: s/ss/eu fuse reading support

Submitted by Chris Wilson on Sept. 13, 2019, 7:51 a.m.

Details

Message ID 20190913075137.18476-2-chris@chris-wilson.co.uk
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Chris Wilson Sept. 13, 2019, 7:51 a.m.
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
some duplicated resources/paths. Although DSS behave similarly to 2
subslices, instead of splitting this and presenting userspace with bits
not directly representative of hardware resources, present userspace
with a subslice_mask made up of DSS bits instead.

v2: GEM_BUG_ON on mask size (Lionel)

Bspec: 29547
Bspec: 12247
Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com> #v1
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: James Ausmus <james.ausmus@intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
 drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
 drivers/gpu/drm/i915/i915_reg.h          |  2 +
 drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------
 include/uapi/drm/i915_drm.h              |  6 +-
 5 files changed, 72 insertions(+), 31 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 4070f6ff1db6..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -18,12 +18,13 @@  struct drm_i915_private;
 #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
 #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
 #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
-#define GEN_MAX_EUS		(10) /* HSW upper bound */
+#define GEN_MAX_EUS		(16) /* TGL upper bound */
 #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
 
 struct sseu_dev_info {
 	u8 slice_mask;
 	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -40,12 +41,6 @@  struct sseu_dev_info {
 
 	u8 ss_stride;
 	u8 eu_stride;
-
-	/* We don't have more than 8 eus per subslice at the moment and as we
-	 * store eus enabled using bits, no need to multiply by eus per
-	 * subslice.
-	 */
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 43db50095257..b5b449a88cf1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3823,7 +3823,8 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
-			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+			if (info->sseu.has_subslice_pg &&
+			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 				/* skip disabled subslice */
 				continue;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bf37ecebc82f..47847135a11f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2956,6 +2956,8 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
 
+#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
+
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 50b05a5de53b..b91a960b037f 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -182,13 +182,69 @@  static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 	return total;
 }
 
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
+				    u8 s_en, u32 ss_en, u16 eu_en)
+{
+	int s, ss;
+
+	/* ss_en represents entire subslice mask across all slices */
+	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
+		   sizeof(ss_en) * BITS_PER_BYTE);
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		if ((s_en & BIT(s)) == 0)
+			continue;
+
+		sseu->slice_mask |= BIT(s);
+
+		intel_sseu_set_subslices(sseu, s, ss_en);
+
+		for (ss = 0; ss < sseu->max_subslices; ss++)
+			if (intel_sseu_has_subslice(sseu, s, ss))
+				sseu_set_eus(sseu, s, ss, eu_en);
+	}
+	sseu->eu_per_subslice = hweight16(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
+}
+
+static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
+{
+	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
+	u8 s_en;
+	u32 dss_en;
+	u16 eu_en = 0;
+	u8 eu_en_fuse;
+	int eu;
+
+	/*
+	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
+	 * Instead of splitting these, provide userspace with an array
+	 * of DSS to more closely represent the hardware resource.
+	 */
+	intel_sseu_set_info(sseu, 1, 6, 16);
+
+	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
+
+	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
+
+	/* one bit per pair of EUs */
+	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
+	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+		if (eu_en_fuse & BIT(eu))
+			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+
+	/* TGL only supports slice-level power gating */
+	sseu->has_slice_pg = 1;
+}
+
 static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	u8 s_en;
-	u32 ss_en, ss_en_mask;
+	u32 ss_en;
 	u8 eu_en;
-	int s;
 
 	if (IS_ELKHARTLAKE(dev_priv))
 		intel_sseu_set_info(sseu, 1, 4, 8);
@@ -197,26 +253,9 @@  static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
 
 	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
 	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
-	ss_en_mask = BIT(sseu->max_subslices) - 1;
 	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
 
-	for (s = 0; s < sseu->max_slices; s++) {
-		if (s_en & BIT(s)) {
-			int ss_idx = sseu->max_subslices * s;
-			int ss;
-
-			sseu->slice_mask |= BIT(s);
-
-			intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
-							  ss_en_mask);
-
-			for (ss = 0; ss < sseu->max_subslices; ss++)
-				if (intel_sseu_has_subslice(sseu, s, ss))
-					sseu_set_eus(sseu, s, ss, eu_en);
-		}
-	}
-	sseu->eu_per_subslice = hweight8(eu_en);
-	sseu->eu_total = compute_eu_total(sseu);
+	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
 
 	/* ICL has no power gating restrictions. */
 	sseu->has_slice_pg = 1;
@@ -959,8 +998,10 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		gen9_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 10))
 		gen10_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 11)
+	else if (IS_GEN(dev_priv, 11))
 		gen11_sseu_info_init(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 12)
+		gen12_sseu_info_init(dev_priv);
 
 	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
 		DRM_INFO("Disabling ppGTT for VT-d support\n");
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..30c542144016 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2033,8 +2033,10 @@  struct drm_i915_query {
  *           (data[X / 8] >> (X % 8)) & 1
  *
  * - the subslice mask for each slice with one bit per subslice telling
- *   whether a subslice is available. The availability of subslice Y in slice
- *   X can be queried with the following formula :
+ *   whether a subslice is available. Gen12 has dual-subslices, which are
+ *   similar to two gen11 subslices. For gen12, this array represents dual-
+ *   subslices. The availability of subslice Y in slice X can be queried
+ *   with the following formula :
  *
  *           (data[subslice_offset +
  *                 X * subslice_stride +

Comments

On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
> some duplicated resources/paths. Although DSS behave similarly to 2
> subslices, instead of splitting this and presenting userspace with bits
> not directly representative of hardware resources, present userspace
> with a subslice_mask made up of DSS bits instead.
>
> v2: GEM_BUG_ON on mask size (Lionel)
>
> Bspec: 29547
> Bspec: 12247
> Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com> #v1
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: José Roberto de Souza <jose.souza@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: James Ausmus <james.ausmus@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---

After this I get the correct values for TGL:
-  Available Subslice Total: 2
-  Available Slice0 subslices: 2
-  Available EU Total: 16
-  Available EU Per Subslice: 8
+  Available Subslice Total: 6
+  Available Slice0 subslices: 6
+  Available EU Total: 96
+  Available EU Per Subslice: 16

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>

Lucas De Marchi
>  drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
>  drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
>  drivers/gpu/drm/i915/i915_reg.h          |  2 +
>  drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------
>  include/uapi/drm/i915_drm.h              |  6 +-
>  5 files changed, 72 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 4070f6ff1db6..d1d225204f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -18,12 +18,13 @@ struct drm_i915_private;
>  #define GEN_MAX_SUBSLICES      (8) /* ICL upper bound */
>  #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>  #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> -#define GEN_MAX_EUS            (10) /* HSW upper bound */
> +#define GEN_MAX_EUS            (16) /* TGL upper bound */
>  #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>
>  struct sseu_dev_info {
>         u8 slice_mask;
>         u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> +       u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>         u16 eu_total;
>         u8 eu_per_subslice;
>         u8 min_eu_in_pool;
> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>
>         u8 ss_stride;
>         u8 eu_stride;
> -
> -       /* We don't have more than 8 eus per subslice at the moment and as we
> -        * store eus enabled using bits, no need to multiply by eus per
> -        * subslice.
> -        */
> -       u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>  };
>
>  /*
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 43db50095257..b5b449a88cf1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>                 for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>                         unsigned int eu_cnt;
>
> -                       if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> +                       if (info->sseu.has_subslice_pg &&
> +                           !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>                                 /* skip disabled subslice */
>                                 continue;
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf37ecebc82f..47847135a11f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>
>  #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>
> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> +
>  #define GEN6_BSD_SLEEP_PSMI_CONTROL    _MMIO(0x12050)
>  #define   GEN6_BSD_SLEEP_MSG_DISABLE   (1 << 0)
>  #define   GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 50b05a5de53b..b91a960b037f 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>         return total;
>  }
>
> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> +                                   u8 s_en, u32 ss_en, u16 eu_en)
> +{
> +       int s, ss;
> +
> +       /* ss_en represents entire subslice mask across all slices */
> +       GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
> +                  sizeof(ss_en) * BITS_PER_BYTE);
> +
> +       for (s = 0; s < sseu->max_slices; s++) {
> +               if ((s_en & BIT(s)) == 0)
> +                       continue;
> +
> +               sseu->slice_mask |= BIT(s);
> +
> +               intel_sseu_set_subslices(sseu, s, ss_en);
> +
> +               for (ss = 0; ss < sseu->max_subslices; ss++)
> +                       if (intel_sseu_has_subslice(sseu, s, ss))
> +                               sseu_set_eus(sseu, s, ss, eu_en);
> +       }
> +       sseu->eu_per_subslice = hweight16(eu_en);
> +       sseu->eu_total = compute_eu_total(sseu);
> +}
> +
> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
> +{
> +       struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> +       u8 s_en;
> +       u32 dss_en;
> +       u16 eu_en = 0;
> +       u8 eu_en_fuse;
> +       int eu;
> +
> +       /*
> +        * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
> +        * Instead of splitting these, provide userspace with an array
> +        * of DSS to more closely represent the hardware resource.
> +        */
> +       intel_sseu_set_info(sseu, 1, 6, 16);
> +
> +       s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> +
> +       dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> +
> +       /* one bit per pair of EUs */
> +       eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
> +       for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> +               if (eu_en_fuse & BIT(eu))
> +                       eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> +
> +       gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> +
> +       /* TGL only supports slice-level power gating */
> +       sseu->has_slice_pg = 1;
> +}
> +
>  static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>  {
>         struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>         u8 s_en;
> -       u32 ss_en, ss_en_mask;
> +       u32 ss_en;
>         u8 eu_en;
> -       int s;
>
>         if (IS_ELKHARTLAKE(dev_priv))
>                 intel_sseu_set_info(sseu, 1, 4, 8);
> @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>
>         s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>         ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> -       ss_en_mask = BIT(sseu->max_subslices) - 1;
>         eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>
> -       for (s = 0; s < sseu->max_slices; s++) {
> -               if (s_en & BIT(s)) {
> -                       int ss_idx = sseu->max_subslices * s;
> -                       int ss;
> -
> -                       sseu->slice_mask |= BIT(s);
> -
> -                       intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
> -                                                         ss_en_mask);
> -
> -                       for (ss = 0; ss < sseu->max_subslices; ss++)
> -                               if (intel_sseu_has_subslice(sseu, s, ss))
> -                                       sseu_set_eus(sseu, s, ss, eu_en);
> -               }
> -       }
> -       sseu->eu_per_subslice = hweight8(eu_en);
> -       sseu->eu_total = compute_eu_total(sseu);
> +       gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>
>         /* ICL has no power gating restrictions. */
>         sseu->has_slice_pg = 1;
> @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>                 gen9_sseu_info_init(dev_priv);
>         else if (IS_GEN(dev_priv, 10))
>                 gen10_sseu_info_init(dev_priv);
> -       else if (INTEL_GEN(dev_priv) >= 11)
> +       else if (IS_GEN(dev_priv, 11))
>                 gen11_sseu_info_init(dev_priv);
> +       else if (INTEL_GEN(dev_priv) >= 12)
> +               gen12_sseu_info_init(dev_priv);
>
>         if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>                 DRM_INFO("Disabling ppGTT for VT-d support\n");
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..30c542144016 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>   *           (data[X / 8] >> (X % 8)) & 1
>   *
>   * - the subslice mask for each slice with one bit per subslice telling
> - *   whether a subslice is available. The availability of subslice Y in slice
> - *   X can be queried with the following formula :
> + *   whether a subslice is available. Gen12 has dual-subslices, which are
> + *   similar to two gen11 subslices. For gen12, this array represents dual-
> + *   subslices. The availability of subslice Y in slice X can be queried
> + *   with the following formula :
>   *
>   *           (data[subslice_offset +
>   *                 X * subslice_stride +
> --
> 2.23.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 21/09/2019 03:39, Lucas De Marchi wrote:
> On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>
>> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
>> some duplicated resources/paths. Although DSS behave similarly to 2
>> subslices, instead of splitting this and presenting userspace with bits
>> not directly representative of hardware resources, present userspace
>> with a subslice_mask made up of DSS bits instead.
>>
>> v2: GEM_BUG_ON on mask size (Lionel)
>>
>> Bspec: 29547
>> Bspec: 12247
>> Cc: Kelvin Gardiner <kelvin.gardiner@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
>> Cc: Michel Thierry <michel.thierry@intel.com> #v1
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: José Roberto de Souza <jose.souza@intel.com>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Signed-off-by: James Ausmus <james.ausmus@intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
>> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
> After this I get the correct values for TGL:
> -  Available Subslice Total: 2
> -  Available Slice0 subslices: 2
> -  Available EU Total: 16
> -  Available EU Per Subslice: 8
> +  Available Subslice Total: 6
> +  Available Slice0 subslices: 6
> +  Available EU Total: 96
> +  Available EU Per Subslice: 16
>
> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>
> Lucas De Marchi


Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for TGL?


-Lionel


>>   drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
>>   drivers/gpu/drm/i915/i915_reg.h          |  2 +
>>   drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------
>>   include/uapi/drm/i915_drm.h              |  6 +-
>>   5 files changed, 72 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> index 4070f6ff1db6..d1d225204f09 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> @@ -18,12 +18,13 @@ struct drm_i915_private;
>>   #define GEN_MAX_SUBSLICES      (8) /* ICL upper bound */
>>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>>   #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>> -#define GEN_MAX_EUS            (10) /* HSW upper bound */
>> +#define GEN_MAX_EUS            (16) /* TGL upper bound */
>>   #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>>
>>   struct sseu_dev_info {
>>          u8 slice_mask;
>>          u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>> +       u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>>          u16 eu_total;
>>          u8 eu_per_subslice;
>>          u8 min_eu_in_pool;
>> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>>
>>          u8 ss_stride;
>>          u8 eu_stride;
>> -
>> -       /* We don't have more than 8 eus per subslice at the moment and as we
>> -        * store eus enabled using bits, no need to multiply by eus per
>> -        * subslice.
>> -        */
>> -       u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>   };
>>
>>   /*
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 43db50095257..b5b449a88cf1 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>>                  for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>>                          unsigned int eu_cnt;
>>
>> -                       if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>> +                       if (info->sseu.has_subslice_pg &&
>> +                           !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>>                                  /* skip disabled subslice */
>>                                  continue;
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index bf37ecebc82f..47847135a11f 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>>
>>   #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>>
>> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
>> +
>>   #define GEN6_BSD_SLEEP_PSMI_CONTROL    _MMIO(0x12050)
>>   #define   GEN6_BSD_SLEEP_MSG_DISABLE   (1 << 0)
>>   #define   GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
>> index 50b05a5de53b..b91a960b037f 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>>          return total;
>>   }
>>
>> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
>> +                                   u8 s_en, u32 ss_en, u16 eu_en)
>> +{
>> +       int s, ss;
>> +
>> +       /* ss_en represents entire subslice mask across all slices */
>> +       GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
>> +                  sizeof(ss_en) * BITS_PER_BYTE);
>> +
>> +       for (s = 0; s < sseu->max_slices; s++) {
>> +               if ((s_en & BIT(s)) == 0)
>> +                       continue;
>> +
>> +               sseu->slice_mask |= BIT(s);
>> +
>> +               intel_sseu_set_subslices(sseu, s, ss_en);
>> +
>> +               for (ss = 0; ss < sseu->max_subslices; ss++)
>> +                       if (intel_sseu_has_subslice(sseu, s, ss))
>> +                               sseu_set_eus(sseu, s, ss, eu_en);
>> +       }
>> +       sseu->eu_per_subslice = hweight16(eu_en);
>> +       sseu->eu_total = compute_eu_total(sseu);
>> +}
>> +
>> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
>> +{
>> +       struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>> +       u8 s_en;
>> +       u32 dss_en;
>> +       u16 eu_en = 0;
>> +       u8 eu_en_fuse;
>> +       int eu;
>> +
>> +       /*
>> +        * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
>> +        * Instead of splitting these, provide userspace with an array
>> +        * of DSS to more closely represent the hardware resource.
>> +        */
>> +       intel_sseu_set_info(sseu, 1, 6, 16);
>> +
>> +       s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>> +
>> +       dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
>> +
>> +       /* one bit per pair of EUs */
>> +       eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>> +       for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
>> +               if (eu_en_fuse & BIT(eu))
>> +                       eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
>> +
>> +       gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
>> +
>> +       /* TGL only supports slice-level power gating */
>> +       sseu->has_slice_pg = 1;
>> +}
>> +
>>   static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>>   {
>>          struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>>          u8 s_en;
>> -       u32 ss_en, ss_en_mask;
>> +       u32 ss_en;
>>          u8 eu_en;
>> -       int s;
>>
>>          if (IS_ELKHARTLAKE(dev_priv))
>>                  intel_sseu_set_info(sseu, 1, 4, 8);
>> @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>>
>>          s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>>          ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
>> -       ss_en_mask = BIT(sseu->max_subslices) - 1;
>>          eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>>
>> -       for (s = 0; s < sseu->max_slices; s++) {
>> -               if (s_en & BIT(s)) {
>> -                       int ss_idx = sseu->max_subslices * s;
>> -                       int ss;
>> -
>> -                       sseu->slice_mask |= BIT(s);
>> -
>> -                       intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
>> -                                                         ss_en_mask);
>> -
>> -                       for (ss = 0; ss < sseu->max_subslices; ss++)
>> -                               if (intel_sseu_has_subslice(sseu, s, ss))
>> -                                       sseu_set_eus(sseu, s, ss, eu_en);
>> -               }
>> -       }
>> -       sseu->eu_per_subslice = hweight8(eu_en);
>> -       sseu->eu_total = compute_eu_total(sseu);
>> +       gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>>
>>          /* ICL has no power gating restrictions. */
>>          sseu->has_slice_pg = 1;
>> @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>>                  gen9_sseu_info_init(dev_priv);
>>          else if (IS_GEN(dev_priv, 10))
>>                  gen10_sseu_info_init(dev_priv);
>> -       else if (INTEL_GEN(dev_priv) >= 11)
>> +       else if (IS_GEN(dev_priv, 11))
>>                  gen11_sseu_info_init(dev_priv);
>> +       else if (INTEL_GEN(dev_priv) >= 12)
>> +               gen12_sseu_info_init(dev_priv);
>>
>>          if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>>                  DRM_INFO("Disabling ppGTT for VT-d support\n");
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 469dc512cca3..30c542144016 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>>    *           (data[X / 8] >> (X % 8)) & 1
>>    *
>>    * - the subslice mask for each slice with one bit per subslice telling
>> - *   whether a subslice is available. The availability of subslice Y in slice
>> - *   X can be queried with the following formula :
>> + *   whether a subslice is available. Gen12 has dual-subslices, which are
>> + *   similar to two gen11 subslices. For gen12, this array represents dual-
>> + *   subslices. The availability of subslice Y in slice X can be queried
>> + *   with the following formula :
>>    *
>>    *           (data[subslice_offset +
>>    *                 X * subslice_stride +
>> --
>> 2.23.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>