drm/amdgpu: fix dead lock if any ip block resume failed in s3

Submitted by Huang, Ray on April 13, 2017, 8:12 a.m.

Details

Message ID 1492071146-12042-1-git-send-email-ray.huang@amd.com
State New
Headers show
Series "drm/amdgpu: fix dead lock if any ip block resume failed in s3" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Huang, Ray April 13, 2017, 8:12 a.m.
Driver must free the console lock whether driver resuming successful
or not.  Otherwise, fb_console will be always waiting for the lock and
then cause system stuck.

[  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[  244.405543]       Tainted: G           OE   4.9.0-custom #1
[  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
[  244.405543]       Tainted: G           OE   4.9.0-custom #1
[  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  244.405550] kworker/0:0     D    0     4      2 0x00080000
[  244.405559] Workqueue: events console_callback
[  244.405564]  ffff88045a2cfc00 0000000000000000 ffff880462b75940 ffffffff81c0e500
[  244.405568]  ffff880476419280 ffffc900018f7c90 ffffffff817dcf62 000000000000003c
[  244.405572]  0000000100000000 0000000000000002 ffff880462b75940 ffff880462b75940
[  244.405573] Call Trace:
[  244.405580]  [<ffffffff817dcf62>] ? __schedule+0x222/0x6a0
[  244.405584]  [<ffffffff817dd416>] schedule+0x36/0x80
[  244.405588]  [<ffffffff817e041c>] schedule_timeout+0x1fc/0x390
[  244.405592]  [<ffffffff817df1b4>] __down_common+0xa5/0xf8
[  244.405598]  [<ffffffff810b2ca8>] ? put_prev_entity+0x48/0x710
[  244.405601]  [<ffffffff817df224>] __down+0x1d/0x1f
[  244.405606]  [<ffffffff810c71a1>] down+0x41/0x50
[  244.405611]  [<ffffffff810d380a>] console_lock+0x1a/0x40
[  244.405614]  [<ffffffff814e3c03>] console_callback+0x13/0x160
[  244.405617]  [<ffffffff817dcf6a>] ? __schedule+0x22a/0x6a0
[  244.405623]  [<ffffffff810954e3>] process_one_work+0x153/0x3f0
[  244.405628]  [<ffffffff81095cab>] worker_thread+0x12b/0x4b0
[  244.405633]  [<ffffffff81095b80>] ? rescuer_thread+0x350/0x350
[  244.405637]  [<ffffffff8109b473>] kthread+0xd3/0xf0
[  244.405641]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
[  244.405645]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
[  244.405649]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30

Signed-off-by: Huang Rui <ray.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bd3a0d5..abb4dcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2280,7 +2280,7 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	struct drm_connector *connector;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_crtc *crtc;
-	int r;
+	int r = 0;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
@@ -2292,11 +2292,8 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		pci_set_power_state(dev->pdev, PCI_D0);
 		pci_restore_state(dev->pdev);
 		r = pci_enable_device(dev->pdev);
-		if (r) {
-			if (fbcon)
-				console_unlock();
-			return r;
-		}
+		if (r)
+			goto unlock;
 	}
 	if (adev->is_atom_fw)
 		amdgpu_atomfirmware_scratch_regs_restore(adev);
@@ -2313,7 +2310,7 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	r = amdgpu_resume(adev);
 	if (r) {
 		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
-		return r;
+		goto unlock;
 	}
 	amdgpu_fence_driver_resume(adev);
 
@@ -2324,11 +2321,8 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_late_init(adev);
-	if (r) {
-		if (fbcon)
-			console_unlock();
-		return r;
-	}
+	if (r)
+		goto unlock;
 
 	/* pin cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2349,7 +2343,7 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 	r = amdgpu_amdkfd_resume(adev);
 	if (r)
-		return r;
+		goto unlock;
 
 	/* blat the mode back in */
 	if (fbcon) {
@@ -2396,12 +2390,14 @@  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	dev->dev->power.disable_depth--;
 #endif
 
-	if (fbcon) {
+	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 0);
+
+unlock:
+	if (fbcon)
 		console_unlock();
-	}
 
-	return 0;
+	return r;
 }
 
 static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)

Comments

On 13/04/17 05:12 PM, Huang Rui wrote:
> Driver must free the console lock whether driver resuming successful
> or not.  Otherwise, fb_console will be always waiting for the lock and
> then cause system stuck.
> 
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [  244.405550] kworker/0:0     D    0     4      2 0x00080000
> [  244.405559] Workqueue: events console_callback
> [  244.405564]  ffff88045a2cfc00 0000000000000000 ffff880462b75940 ffffffff81c0e500
> [  244.405568]  ffff880476419280 ffffc900018f7c90 ffffffff817dcf62 000000000000003c
> [  244.405572]  0000000100000000 0000000000000002 ffff880462b75940 ffff880462b75940
> [  244.405573] Call Trace:
> [  244.405580]  [<ffffffff817dcf62>] ? __schedule+0x222/0x6a0
> [  244.405584]  [<ffffffff817dd416>] schedule+0x36/0x80
> [  244.405588]  [<ffffffff817e041c>] schedule_timeout+0x1fc/0x390
> [  244.405592]  [<ffffffff817df1b4>] __down_common+0xa5/0xf8
> [  244.405598]  [<ffffffff810b2ca8>] ? put_prev_entity+0x48/0x710
> [  244.405601]  [<ffffffff817df224>] __down+0x1d/0x1f
> [  244.405606]  [<ffffffff810c71a1>] down+0x41/0x50
> [  244.405611]  [<ffffffff810d380a>] console_lock+0x1a/0x40
> [  244.405614]  [<ffffffff814e3c03>] console_callback+0x13/0x160
> [  244.405617]  [<ffffffff817dcf6a>] ? __schedule+0x22a/0x6a0
> [  244.405623]  [<ffffffff810954e3>] process_one_work+0x153/0x3f0
> [  244.405628]  [<ffffffff81095cab>] worker_thread+0x12b/0x4b0
> [  244.405633]  [<ffffffff81095b80>] ? rescuer_thread+0x350/0x350
> [  244.405637]  [<ffffffff8109b473>] kthread+0xd3/0xf0
> [  244.405641]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405645]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405649]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
> 
> Signed-off-by: Huang Rui <ray.huang@amd.com>

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
> -----Original Message-----
> From: Huang Rui [mailto:ray.huang@amd.com]
> Sent: Thursday, April 13, 2017 4:12 AM
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> Cc: Koenig, Christian; Wang, Ken; Huang, Ray
> Subject: [PATCH] drm/amdgpu: fix dead lock if any ip block resume failed in
> s3
> 
> Driver must free the console lock whether driver resuming successful
> or not.  Otherwise, fb_console will be always waiting for the lock and
> then cause system stuck.
> 
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
> this message.
> [  244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds.
> [  244.405543]       Tainted: G           OE   4.9.0-custom #1
> [  244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
> this message.
> [  244.405550] kworker/0:0     D    0     4      2 0x00080000
> [  244.405559] Workqueue: events console_callback
> [  244.405564]  ffff88045a2cfc00 0000000000000000 ffff880462b75940
> ffffffff81c0e500
> [  244.405568]  ffff880476419280 ffffc900018f7c90 ffffffff817dcf62
> 000000000000003c
> [  244.405572]  0000000100000000 0000000000000002 ffff880462b75940
> ffff880462b75940
> [  244.405573] Call Trace:
> [  244.405580]  [<ffffffff817dcf62>] ? __schedule+0x222/0x6a0
> [  244.405584]  [<ffffffff817dd416>] schedule+0x36/0x80
> [  244.405588]  [<ffffffff817e041c>] schedule_timeout+0x1fc/0x390
> [  244.405592]  [<ffffffff817df1b4>] __down_common+0xa5/0xf8
> [  244.405598]  [<ffffffff810b2ca8>] ? put_prev_entity+0x48/0x710
> [  244.405601]  [<ffffffff817df224>] __down+0x1d/0x1f
> [  244.405606]  [<ffffffff810c71a1>] down+0x41/0x50
> [  244.405611]  [<ffffffff810d380a>] console_lock+0x1a/0x40
> [  244.405614]  [<ffffffff814e3c03>] console_callback+0x13/0x160
> [  244.405617]  [<ffffffff817dcf6a>] ? __schedule+0x22a/0x6a0
> [  244.405623]  [<ffffffff810954e3>] process_one_work+0x153/0x3f0
> [  244.405628]  [<ffffffff81095cab>] worker_thread+0x12b/0x4b0
> [  244.405633]  [<ffffffff81095b80>] ? rescuer_thread+0x350/0x350
> [  244.405637]  [<ffffffff8109b473>] kthread+0xd3/0xf0
> [  244.405641]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405645]  [<ffffffff8109b3a0>] ? kthread_park+0x60/0x60
> [  244.405649]  [<ffffffff817e1ee5>] ret_from_fork+0x25/0x30
> 
> Signed-off-by: Huang Rui <ray.huang@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 ++++++++++++-------
> ---------
>  1 file changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bd3a0d5..abb4dcc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2280,7 +2280,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	struct drm_connector *connector;
>  	struct amdgpu_device *adev = dev->dev_private;
>  	struct drm_crtc *crtc;
> -	int r;
> +	int r = 0;
> 
>  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
>  		return 0;
> @@ -2292,11 +2292,8 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  		pci_set_power_state(dev->pdev, PCI_D0);
>  		pci_restore_state(dev->pdev);
>  		r = pci_enable_device(dev->pdev);
> -		if (r) {
> -			if (fbcon)
> -				console_unlock();
> -			return r;
> -		}
> +		if (r)
> +			goto unlock;
>  	}
>  	if (adev->is_atom_fw)
>  		amdgpu_atomfirmware_scratch_regs_restore(adev);
> @@ -2313,7 +2310,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	r = amdgpu_resume(adev);
>  	if (r) {
>  		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
> -		return r;
> +		goto unlock;
>  	}
>  	amdgpu_fence_driver_resume(adev);
> 
> @@ -2324,11 +2321,8 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	}
> 
>  	r = amdgpu_late_init(adev);
> -	if (r) {
> -		if (fbcon)
> -			console_unlock();
> -		return r;
> -	}
> +	if (r)
> +		goto unlock;
> 
>  	/* pin cursors */
>  	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
> @@ -2349,7 +2343,7 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	}
>  	r = amdgpu_amdkfd_resume(adev);
>  	if (r)
> -		return r;
> +		goto unlock;
> 
>  	/* blat the mode back in */
>  	if (fbcon) {
> @@ -2396,12 +2390,14 @@ int amdgpu_device_resume(struct drm_device
> *dev, bool resume, bool fbcon)
>  	dev->dev->power.disable_depth--;
>  #endif
> 
> -	if (fbcon) {
> +	if (fbcon)
>  		amdgpu_fbdev_set_suspend(adev, 0);
> +
> +unlock:
> +	if (fbcon)
>  		console_unlock();
> -	}
> 
> -	return 0;
> +	return r;
>  }
> 
>  static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
> --
> 2.7.4