[2/3] drm/i915/uc: Don't always fail on unavailable GuC firmware

Submitted by Michal Wajdeczko on Aug. 18, 2019, 9:52 a.m.

Details

Message ID 20190818095204.31568-3-michal.wajdeczko@intel.com
State Accepted
Commit ee402140274e87030bbdcf92b145974f69ae01d3
Headers show
Series "Don't fail on GuC failure" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Michal Wajdeczko Aug. 18, 2019, 9:52 a.m.
If we failed to fetch default GuC firmware and we didn't plan
to use it for the submission and we never have used GuC before
then we may continue normal driver load, no need to declare
GPU wedged (we can use execlist for submission) and it is safe
to run without the HuC (users will check HuC status anyway).

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 6f0b9e05a5f6..10978e7ff06d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -400,6 +400,15 @@  static int uc_init_wopcm(struct intel_uc *uc)
 	return err;
 }
 
+static bool uc_is_wopcm_locked(struct intel_uc *uc)
+{
+	struct intel_gt *gt = uc_to_gt(uc);
+	struct intel_uncore *uncore = gt->uncore;
+
+	return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) ||
+	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
+}
+
 int intel_uc_init_hw(struct intel_uc *uc)
 {
 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
@@ -410,11 +419,19 @@  int intel_uc_init_hw(struct intel_uc *uc)
 	if (!intel_uc_supports_guc(uc))
 		return 0;
 
-	if (!intel_uc_uses_guc(uc))
+	/*
+	 * We can silently continue without GuC only if it was never enabled
+	 * before on this system after reboot, otherwise we risk GPU hangs.
+	 * To check if GuC was loaded before we look at WOPCM registers.
+	 */
+	if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
 		return 0;
 
 	if (!intel_uc_fw_is_available(&guc->fw)) {
-		ret = intel_uc_fw_status_to_error(guc->fw.status);
+		ret = uc_is_wopcm_locked(uc) ||
+		      intel_uc_fw_is_overridden(&guc->fw) ||
+		      intel_uc_supports_guc_submission(uc) ?
+		      intel_uc_fw_status_to_error(guc->fw.status) : 0;
 		goto err_out;
 	}
 
@@ -507,6 +524,12 @@  int intel_uc_init_hw(struct intel_uc *uc)
 err_out:
 	__uc_sanitize(uc);
 
+	if (!ret) {
+		dev_notice(i915->drm.dev, "GuC is uninitialized\n");
+		/* We want to run without GuC submission */
+		return 0;
+	}
+
 	i915_probe_error(i915, "GuC initialization failed %d\n", ret);
 
 	/* We want to keep KMS alive */

Comments

Quoting Michal Wajdeczko (2019-08-18 10:52:03)
> If we failed to fetch default GuC firmware and we didn't plan
> to use it for the submission and we never have used GuC before
> then we may continue normal driver load, no need to declare
> GPU wedged (we can use execlist for submission) and it is safe
> to run without the HuC (users will check HuC status anyway).
> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 6f0b9e05a5f6..10978e7ff06d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -400,6 +400,15 @@ static int uc_init_wopcm(struct intel_uc *uc)
>         return err;
>  }
>  
> +static bool uc_is_wopcm_locked(struct intel_uc *uc)
> +{
> +       struct intel_gt *gt = uc_to_gt(uc);
> +       struct intel_uncore *uncore = gt->uncore;
> +
> +       return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) ||
> +              (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
> +}
> +
>  int intel_uc_init_hw(struct intel_uc *uc)
>  {
>         struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
> @@ -410,11 +419,19 @@ int intel_uc_init_hw(struct intel_uc *uc)
>         if (!intel_uc_supports_guc(uc))
>                 return 0;
>  
> -       if (!intel_uc_uses_guc(uc))
> +       /*
> +        * We can silently continue without GuC only if it was never enabled
> +        * before on this system after reboot, otherwise we risk GPU hangs.
> +        * To check if GuC was loaded before we look at WOPCM registers.
> +        */
> +       if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
>                 return 0;
>  
>         if (!intel_uc_fw_is_available(&guc->fw)) {
> -               ret = intel_uc_fw_status_to_error(guc->fw.status);
> +               ret = uc_is_wopcm_locked(uc) ||
> +                     intel_uc_fw_is_overridden(&guc->fw) ||
> +                     intel_uc_supports_guc_submission(uc) ?
> +                     intel_uc_fw_status_to_error(guc->fw.status) : 0;

I'm just going to leave it out here that this is a bit of a mouthful,
and would suggest a small function to clarify -- but naming is hard.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris