[v2,4/6] drm/msm/a6xx: Remove devm calls from gmu driver

Submitted by Sean Paul on May 23, 2019, 5:16 p.m.

Details

Message ID 20190523171653.138678-4-sean@poorly.run
State Accepted
Commit a62fb211ad0c9e13e79d350905a704f46447b8cf
Headers show
Series "Series without cover letter" ( rev: 1 ) in DRI devel

Not browsing as part of any series.

Commit Message

Sean Paul May 23, 2019, 5:16 p.m.
From: Sean Paul <seanpaul@chromium.org>

The gmu driver is initialized and cleaned up with calls from the gpu driver. As
such, the platform device stays valid after a6xx_gmu_remove is called and the
device managed resources are not freed. In the case of gpu probe failures or
unbind, these resources will remain managed.

If the gpu bind is run again (eg: if there's a probe defer somewhere in msm),
these resources will be initialized again for the same device, creating multiple
references. In the case of irqs, this causes failures since the irqs are
not shared (nor should they be).

This patch removes all devm_* calls and manually cleans things up in
gmu_remove.

Changes in v2:
- Add iounmap and free_irq to gmu_probe error paths

Cc: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 33 ++++++++++++++++++---------
 1 file changed, 22 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 7465423e9b71..f7240c9e11fb 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -505,9 +505,9 @@  static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 
 err:
 	if (!IS_ERR_OR_NULL(pdcptr))
-		devm_iounmap(gmu->dev, pdcptr);
+		iounmap(pdcptr);
 	if (!IS_ERR_OR_NULL(seqptr))
-		devm_iounmap(gmu->dev, seqptr);
+		iounmap(seqptr);
 }
 
 /*
@@ -1197,7 +1197,7 @@  static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev,
 		return ERR_PTR(-EINVAL);
 	}
 
-	ret = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	ret = ioremap(res->start, resource_size(res));
 	if (!ret) {
 		DRM_DEV_ERROR(&pdev->dev, "Unable to map the %s registers\n", name);
 		return ERR_PTR(-EINVAL);
@@ -1213,10 +1213,10 @@  static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev,
 
 	irq = platform_get_irq_byname(pdev, name);
 
-	ret = devm_request_irq(&pdev->dev, irq, handler, IRQF_TRIGGER_HIGH,
-		name, gmu);
+	ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH, name, gmu);
 	if (ret) {
-		DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s\n", name);
+		DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n",
+			      name, ret);
 		return ret;
 	}
 
@@ -1241,12 +1241,18 @@  void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
 		dev_pm_domain_detach(gmu->gxpd, false);
 	}
 
+	iounmap(gmu->mmio);
+	gmu->mmio = NULL;
+
 	a6xx_gmu_memory_free(gmu, gmu->hfi);
 
 	iommu_detach_device(gmu->domain, gmu->dev);
 
 	iommu_domain_free(gmu->domain);
 
+	free_irq(gmu->gmu_irq, gmu);
+	free_irq(gmu->hfi_irq, gmu);
+
 	gmu->initialized = false;
 }
 
@@ -1281,24 +1287,24 @@  int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 	/* Allocate memory for for the HFI queues */
 	gmu->hfi = a6xx_gmu_memory_alloc(gmu, SZ_16K);
 	if (IS_ERR(gmu->hfi))
-		goto err;
+		goto err_memory;
 
 	/* Allocate memory for the GMU debug region */
 	gmu->debug = a6xx_gmu_memory_alloc(gmu, SZ_16K);
 	if (IS_ERR(gmu->debug))
-		goto err;
+		goto err_memory;
 
 	/* Map the GMU registers */
 	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
 	if (IS_ERR(gmu->mmio))
-		goto err;
+		goto err_memory;
 
 	/* Get the HFI and GMU interrupts */
 	gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
 	gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
 
 	if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0)
-		goto err;
+		goto err_mmio;
 
 	/*
 	 * Get a link to the GX power domain to reset the GPU in case of GMU
@@ -1315,7 +1321,12 @@  int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
 	gmu->initialized = true;
 
 	return 0;
-err:
+
+err_mmio:
+	iounmap(gmu->mmio);
+	free_irq(gmu->gmu_irq, gmu);
+	free_irq(gmu->hfi_irq, gmu);
+err_memory:
 	a6xx_gmu_memory_free(gmu, gmu->hfi);
 
 	if (gmu->domain) {

Comments

On Thu, May 23, 2019 at 01:16:43PM -0400, Sean Paul wrote:
> From: Sean Paul <seanpaul@chromium.org>
> 
> The gmu driver is initialized and cleaned up with calls from the gpu driver. As
> such, the platform device stays valid after a6xx_gmu_remove is called and the
> device managed resources are not freed. In the case of gpu probe failures or
> unbind, these resources will remain managed.
> 
> If the gpu bind is run again (eg: if there's a probe defer somewhere in msm),
> these resources will be initialized again for the same device, creating multiple
> references. In the case of irqs, this causes failures since the irqs are
> not shared (nor should they be).
> 
> This patch removes all devm_* calls and manually cleans things up in
> gmu_remove.
> 
> Changes in v2:
> - Add iounmap and free_irq to gmu_probe error paths
> 
> Cc: Jordan Crouse <jcrouse@codeaurora.org>
> Signed-off-by: Sean Paul <seanpaul@chromium.org>

As we discussed in IRC, I feel like the way we are probing and dealing with
the GMU device is messing up the reference counting. I had hoped that a
put_device() would do the trick but testing showed it didn't so there is clearly
remaining fail that we should eventually find and fix.

That said; there is really no reason to be using managed resources for this
device so this is an entirely appropriate patch.

Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>

> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 33 ++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index 7465423e9b71..f7240c9e11fb 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -505,9 +505,9 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
>  
>  err:
>  	if (!IS_ERR_OR_NULL(pdcptr))
> -		devm_iounmap(gmu->dev, pdcptr);
> +		iounmap(pdcptr);
>  	if (!IS_ERR_OR_NULL(seqptr))
> -		devm_iounmap(gmu->dev, seqptr);
> +		iounmap(seqptr);
>  }
>  
>  /*
> @@ -1197,7 +1197,7 @@ static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev,
>  		return ERR_PTR(-EINVAL);
>  	}
>  
> -	ret = devm_ioremap(&pdev->dev, res->start, resource_size(res));
> +	ret = ioremap(res->start, resource_size(res));
>  	if (!ret) {
>  		DRM_DEV_ERROR(&pdev->dev, "Unable to map the %s registers\n", name);
>  		return ERR_PTR(-EINVAL);
> @@ -1213,10 +1213,10 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev,
>  
>  	irq = platform_get_irq_byname(pdev, name);
>  
> -	ret = devm_request_irq(&pdev->dev, irq, handler, IRQF_TRIGGER_HIGH,
> -		name, gmu);
> +	ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH, name, gmu);
>  	if (ret) {
> -		DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s\n", name);
> +		DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n",
> +			      name, ret);
>  		return ret;
>  	}
>  
> @@ -1241,12 +1241,18 @@ void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
>  		dev_pm_domain_detach(gmu->gxpd, false);
>  	}
>  
> +	iounmap(gmu->mmio);
> +	gmu->mmio = NULL;
> +
>  	a6xx_gmu_memory_free(gmu, gmu->hfi);
>  
>  	iommu_detach_device(gmu->domain, gmu->dev);
>  
>  	iommu_domain_free(gmu->domain);
>  
> +	free_irq(gmu->gmu_irq, gmu);
> +	free_irq(gmu->hfi_irq, gmu);
> +
>  	gmu->initialized = false;
>  }
>  
> @@ -1281,24 +1287,24 @@ int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>  	/* Allocate memory for for the HFI queues */
>  	gmu->hfi = a6xx_gmu_memory_alloc(gmu, SZ_16K);
>  	if (IS_ERR(gmu->hfi))
> -		goto err;
> +		goto err_memory;
>  
>  	/* Allocate memory for the GMU debug region */
>  	gmu->debug = a6xx_gmu_memory_alloc(gmu, SZ_16K);
>  	if (IS_ERR(gmu->debug))
> -		goto err;
> +		goto err_memory;
>  
>  	/* Map the GMU registers */
>  	gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu");
>  	if (IS_ERR(gmu->mmio))
> -		goto err;
> +		goto err_memory;
>  
>  	/* Get the HFI and GMU interrupts */
>  	gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
>  	gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
>  
>  	if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0)
> -		goto err;
> +		goto err_mmio;
>  
>  	/*
>  	 * Get a link to the GX power domain to reset the GPU in case of GMU
> @@ -1315,7 +1321,12 @@ int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
>  	gmu->initialized = true;
>  
>  	return 0;
> -err:
> +
> +err_mmio:
> +	iounmap(gmu->mmio);
> +	free_irq(gmu->gmu_irq, gmu);
> +	free_irq(gmu->hfi_irq, gmu);
> +err_memory:
>  	a6xx_gmu_memory_free(gmu, gmu->hfi);
>  
>  	if (gmu->domain) {
> -- 
> Sean Paul, Software Engineer, Google / Chromium OS
>