drm/amd/display: Add fast path for cursor plane updates

Submitted by Kazlauskas, Nicholas on Dec. 5, 2018, 7:59 p.m.

Details

Message ID 20181205195907.9501-1-nicholas.kazlauskas@amd.com
State New
Headers show
Series "drm/amd/display: Add fast path for cursor plane updates" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Kazlauskas, Nicholas Dec. 5, 2018, 7:59 p.m.
[Why]
Legacy cursor plane updates from drm helpers go through the full
atomic codepath. A high volume of cursor updates through this slow
code path can cause subsequent page-flips to skip vblank intervals
since each individual update is slow.

This problem is particularly noticeable for the compton compositor.

[How]
A fast path for cursor plane updates is added by using DRM asynchronous
commit support provided by async_check and async_update. These don't do
a full state/flip_done dependency stall and they don't block other
commit work.

However, DC still expects itself to be single-threaded for anything
that can issue register writes. Screen corruption or hangs can occur
if write sequences overlap. Every call that potentially perform
register writes needs to be guarded for asynchronous updates to work.
The dc_lock mutex was added for this.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

Cc: Leo Li <sunpeng.li@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
 2 files changed, 73 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 23d61570df17..4df14a50a8ac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -57,6 +57,7 @@ 
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -133,6 +134,8 @@  static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				  struct drm_atomic_state *state);
 
+static void handle_cursor_update(struct drm_plane *plane,
+				 struct drm_plane_state *old_plane_state);
 
 
 
@@ -402,6 +405,8 @@  static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* Zero all the fields */
 	memset(&init_data, 0, sizeof(init_data));
 
+	mutex_init(&adev->dm.dc_lock);
+
 	if(amdgpu_dm_irq_init(adev)) {
 		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
 		goto error;
@@ -516,6 +521,9 @@  static void amdgpu_dm_fini(struct amdgpu_device *adev)
 	/* DC Destroy TODO: Replace destroy DAL */
 	if (adev->dm.dc)
 		dc_destroy(&adev->dm.dc);
+
+	mutex_destroy(&adev->dm.dc_lock);
+
 	return;
 }
 
@@ -3615,10 +3623,43 @@  static int dm_plane_atomic_check(struct drm_plane *plane,
 	return -EINVAL;
 }
 
+static int dm_plane_atomic_async_check(struct drm_plane *plane,
+				       struct drm_plane_state *new_plane_state)
+{
+	/* Only support async updates on cursor planes. */
+	if (plane->type != DRM_PLANE_TYPE_CURSOR)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void dm_plane_atomic_async_update(struct drm_plane *plane,
+					 struct drm_plane_state *new_state)
+{
+	struct drm_plane_state *old_state =
+		drm_atomic_get_old_plane_state(new_state->state, plane);
+
+	if (plane->state->fb != new_state->fb)
+		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
+
+	plane->state->src_x = new_state->src_x;
+	plane->state->src_y = new_state->src_y;
+	plane->state->src_w = new_state->src_w;
+	plane->state->src_h = new_state->src_h;
+	plane->state->crtc_x = new_state->crtc_x;
+	plane->state->crtc_y = new_state->crtc_y;
+	plane->state->crtc_w = new_state->crtc_w;
+	plane->state->crtc_h = new_state->crtc_h;
+
+	handle_cursor_update(plane, old_state);
+}
+
 static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
 	.prepare_fb = dm_plane_helper_prepare_fb,
 	.cleanup_fb = dm_plane_helper_cleanup_fb,
 	.atomic_check = dm_plane_atomic_check,
+	.atomic_async_check = dm_plane_atomic_async_check,
+	.atomic_async_update = dm_plane_atomic_async_update
 };
 
 /*
@@ -4307,6 +4348,7 @@  static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 static void handle_cursor_update(struct drm_plane *plane,
 				 struct drm_plane_state *old_plane_state)
 {
+	struct amdgpu_device *adev = plane->dev->dev_private;
 	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
 	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
 	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
@@ -4331,9 +4373,12 @@  static void handle_cursor_update(struct drm_plane *plane,
 
 	if (!position.enable) {
 		/* turn off cursor */
-		if (crtc_state && crtc_state->stream)
+		if (crtc_state && crtc_state->stream) {
+			mutex_lock(&adev->dm.dc_lock);
 			dc_stream_set_cursor_position(crtc_state->stream,
 						      &position);
+			mutex_unlock(&adev->dm.dc_lock);
+		}
 		return;
 	}
 
@@ -4351,6 +4396,7 @@  static void handle_cursor_update(struct drm_plane *plane,
 	attributes.pitch = attributes.width;
 
 	if (crtc_state->stream) {
+		mutex_lock(&adev->dm.dc_lock);
 		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
 							 &attributes))
 			DRM_ERROR("DC failed to set cursor attributes\n");
@@ -4358,6 +4404,7 @@  static void handle_cursor_update(struct drm_plane *plane,
 		if (!dc_stream_set_cursor_position(crtc_state->stream,
 						   &position))
 			DRM_ERROR("DC failed to set cursor position\n");
+		mutex_unlock(&adev->dm.dc_lock);
 	}
 }
 
@@ -4573,6 +4620,7 @@  static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 				&acrtc_state->stream->vrr_infopacket;
 	}
 
+	mutex_lock(&adev->dm.dc_lock);
 	dc_commit_updates_for_stream(adev->dm.dc,
 					     surface_updates,
 					     1,
@@ -4580,6 +4628,7 @@  static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 					     &stream_update,
 					     &surface_updates->surface,
 					     state);
+	mutex_unlock(&adev->dm.dc_lock);
 
 	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
 			 __func__,
@@ -4594,6 +4643,7 @@  static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
  * with a dc_plane_state and follow the atomic model a bit more closely here.
  */
 static bool commit_planes_to_stream(
+		struct amdgpu_display_manager *dm,
 		struct dc *dc,
 		struct dc_plane_state **plane_states,
 		uint8_t new_plane_count,
@@ -4670,11 +4720,13 @@  static bool commit_planes_to_stream(
 		updates[i].scaling_info = &scaling_info[i];
 	}
 
+	mutex_lock(&dm->dc_lock);
 	dc_commit_updates_for_stream(
 			dc,
 			updates,
 			new_plane_count,
 			dc_stream, stream_update, plane_states, state);
+	mutex_unlock(&dm->dc_lock);
 
 	kfree(flip_addr);
 	kfree(plane_info);
@@ -4780,7 +4832,8 @@  static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 
 		dc_stream_attach->abm_level = acrtc_state->abm_level;
 
-		if (false == commit_planes_to_stream(dm->dc,
+		if (false == commit_planes_to_stream(dm,
+							dm->dc,
 							plane_states_constructed,
 							planes_count,
 							acrtc_state,
@@ -4950,7 +5003,9 @@  static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 	if (dc_state) {
 		dm_enable_per_frame_crtc_master_sync(dc_state);
+		mutex_lock(&dm->dc_lock);
 		WARN_ON(!dc_commit_state(dm->dc, dc_state));
+		mutex_unlock(&dm->dc_lock);
 	}
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
@@ -5012,6 +5067,7 @@  static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 		/*TODO How it works with MPO ?*/
 		if (!commit_planes_to_stream(
+				dm,
 				dm->dc,
 				status->plane_states,
 				status->plane_count,
@@ -5904,6 +5960,13 @@  static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			ret = -EINVAL;
 			goto fail;
 		}
+	} else if (state->legacy_cursor_update) {
+		/*
+		 * This is a fast cursor update coming from the plane update
+		 * helper, check if it can be done asynchronously for better
+		 * performance.
+		 */
+		state->async_update = !drm_atomic_helper_async_check(dev, state);
 	}
 
 	/* Must be success */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 4326dc256491..25bb91ee80ba 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -134,6 +134,14 @@  struct amdgpu_display_manager {
 
 	struct drm_modeset_lock atomic_obj_lock;
 
+	/**
+	 * @dc_lock:
+	 *
+	 * Guards access to DC functions that can issue register write
+	 * sequences.
+	 */
+	struct mutex dc_lock;
+
 	/**
 	 * @irq_handler_list_low_tab:
 	 *

Comments

On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
> [Why]

> Legacy cursor plane updates from drm helpers go through the full

> atomic codepath. A high volume of cursor updates through this slow

> code path can cause subsequent page-flips to skip vblank intervals

> since each individual update is slow.

>

> This problem is particularly noticeable for the compton compositor.

>

> [How]

> A fast path for cursor plane updates is added by using DRM asynchronous

> commit support provided by async_check and async_update. These don't do

> a full state/flip_done dependency stall and they don't block other

> commit work.

>

> However, DC still expects itself to be single-threaded for anything

> that can issue register writes. Screen corruption or hangs can occur

> if write sequences overlap. Every call that potentially perform

> register writes needs to be guarded for asynchronous updates to work.

> The dc_lock mutex was added for this.


As far as page flip related register writes concerned this I think this 
was never an issue - we always
supported fully concurrent page flips on different CRTCs meaning writing 
surface address concurrently
on different pipes. So Are you sure you  can't do cursor update 
concurrently against at least page flips ?
I am not sure about full updates.

Andrey

>

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>

> Cc: Leo Li <sunpeng.li@amd.com>

> Cc: Harry Wentland <harry.wentland@amd.com>

> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

> ---

>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>   2 files changed, 73 insertions(+), 2 deletions(-)

>

> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> index 23d61570df17..4df14a50a8ac 100644

> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> @@ -57,6 +57,7 @@

>   

>   #include <drm/drmP.h>

>   #include <drm/drm_atomic.h>

> +#include <drm/drm_atomic_uapi.h>

>   #include <drm/drm_atomic_helper.h>

>   #include <drm/drm_dp_mst_helper.h>

>   #include <drm/drm_fb_helper.h>

> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>   static int amdgpu_dm_atomic_check(struct drm_device *dev,

>   				  struct drm_atomic_state *state);

>   

> +static void handle_cursor_update(struct drm_plane *plane,

> +				 struct drm_plane_state *old_plane_state);

>   

>   

>   

> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>   	/* Zero all the fields */

>   	memset(&init_data, 0, sizeof(init_data));

>   

> +	mutex_init(&adev->dm.dc_lock);

> +

>   	if(amdgpu_dm_irq_init(adev)) {

>   		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>   		goto error;

> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>   	/* DC Destroy TODO: Replace destroy DAL */

>   	if (adev->dm.dc)

>   		dc_destroy(&adev->dm.dc);

> +

> +	mutex_destroy(&adev->dm.dc_lock);

> +

>   	return;

>   }

>   

> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>   	return -EINVAL;

>   }

>   

> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

> +				       struct drm_plane_state *new_plane_state)

> +{

> +	/* Only support async updates on cursor planes. */

> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

> +		return -EINVAL;

> +

> +	return 0;

> +}

> +

> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

> +					 struct drm_plane_state *new_state)

> +{

> +	struct drm_plane_state *old_state =

> +		drm_atomic_get_old_plane_state(new_state->state, plane);

> +

> +	if (plane->state->fb != new_state->fb)

> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

> +

> +	plane->state->src_x = new_state->src_x;

> +	plane->state->src_y = new_state->src_y;

> +	plane->state->src_w = new_state->src_w;

> +	plane->state->src_h = new_state->src_h;

> +	plane->state->crtc_x = new_state->crtc_x;

> +	plane->state->crtc_y = new_state->crtc_y;

> +	plane->state->crtc_w = new_state->crtc_w;

> +	plane->state->crtc_h = new_state->crtc_h;

> +

> +	handle_cursor_update(plane, old_state);

> +}

> +

>   static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>   	.prepare_fb = dm_plane_helper_prepare_fb,

>   	.cleanup_fb = dm_plane_helper_cleanup_fb,

>   	.atomic_check = dm_plane_atomic_check,

> +	.atomic_async_check = dm_plane_atomic_async_check,

> +	.atomic_async_update = dm_plane_atomic_async_update

>   };

>   

>   /*

> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>   static void handle_cursor_update(struct drm_plane *plane,

>   				 struct drm_plane_state *old_plane_state)

>   {

> +	struct amdgpu_device *adev = plane->dev->dev_private;

>   	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>   	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>   	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>   

>   	if (!position.enable) {

>   		/* turn off cursor */

> -		if (crtc_state && crtc_state->stream)

> +		if (crtc_state && crtc_state->stream) {

> +			mutex_lock(&adev->dm.dc_lock);

>   			dc_stream_set_cursor_position(crtc_state->stream,

>   						      &position);

> +			mutex_unlock(&adev->dm.dc_lock);

> +		}

>   		return;

>   	}

>   

> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>   	attributes.pitch = attributes.width;

>   

>   	if (crtc_state->stream) {

> +		mutex_lock(&adev->dm.dc_lock);

>   		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>   							 &attributes))

>   			DRM_ERROR("DC failed to set cursor attributes\n");

> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>   		if (!dc_stream_set_cursor_position(crtc_state->stream,

>   						   &position))

>   			DRM_ERROR("DC failed to set cursor position\n");

> +		mutex_unlock(&adev->dm.dc_lock);

>   	}

>   }

>   

> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>   				&acrtc_state->stream->vrr_infopacket;

>   	}

>   

> +	mutex_lock(&adev->dm.dc_lock);

>   	dc_commit_updates_for_stream(adev->dm.dc,

>   					     surface_updates,

>   					     1,

> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>   					     &stream_update,

>   					     &surface_updates->surface,

>   					     state);

> +	mutex_unlock(&adev->dm.dc_lock);

>   

>   	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>   			 __func__,

> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>    * with a dc_plane_state and follow the atomic model a bit more closely here.

>    */

>   static bool commit_planes_to_stream(

> +		struct amdgpu_display_manager *dm,

>   		struct dc *dc,

>   		struct dc_plane_state **plane_states,

>   		uint8_t new_plane_count,

> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>   		updates[i].scaling_info = &scaling_info[i];

>   	}

>   

> +	mutex_lock(&dm->dc_lock);

>   	dc_commit_updates_for_stream(

>   			dc,

>   			updates,

>   			new_plane_count,

>   			dc_stream, stream_update, plane_states, state);

> +	mutex_unlock(&dm->dc_lock);

>   

>   	kfree(flip_addr);

>   	kfree(plane_info);

> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>   

>   		dc_stream_attach->abm_level = acrtc_state->abm_level;

>   

> -		if (false == commit_planes_to_stream(dm->dc,

> +		if (false == commit_planes_to_stream(dm,

> +							dm->dc,

>   							plane_states_constructed,

>   							planes_count,

>   							acrtc_state,

> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>   

>   	if (dc_state) {

>   		dm_enable_per_frame_crtc_master_sync(dc_state);

> +		mutex_lock(&dm->dc_lock);

>   		WARN_ON(!dc_commit_state(dm->dc, dc_state));

> +		mutex_unlock(&dm->dc_lock);

>   	}

>   

>   	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>   

>   		/*TODO How it works with MPO ?*/

>   		if (!commit_planes_to_stream(

> +				dm,

>   				dm->dc,

>   				status->plane_states,

>   				status->plane_count,

> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>   			ret = -EINVAL;

>   			goto fail;

>   		}

> +	} else if (state->legacy_cursor_update) {

> +		/*

> +		 * This is a fast cursor update coming from the plane update

> +		 * helper, check if it can be done asynchronously for better

> +		 * performance.

> +		 */

> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>   	}

>   

>   	/* Must be success */

> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> index 4326dc256491..25bb91ee80ba 100644

> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>   

>   	struct drm_modeset_lock atomic_obj_lock;

>   

> +	/**

> +	 * @dc_lock:

> +	 *

> +	 * Guards access to DC functions that can issue register write

> +	 * sequences.

> +	 */

> +	struct mutex dc_lock;

> +

>   	/**

>   	 * @irq_handler_list_low_tab:

>   	 *
On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
> 

> 

> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:

>> [Why]

>> Legacy cursor plane updates from drm helpers go through the full

>> atomic codepath. A high volume of cursor updates through this slow

>> code path can cause subsequent page-flips to skip vblank intervals

>> since each individual update is slow.

>>

>> This problem is particularly noticeable for the compton compositor.

>>

>> [How]

>> A fast path for cursor plane updates is added by using DRM asynchronous

>> commit support provided by async_check and async_update. These don't do

>> a full state/flip_done dependency stall and they don't block other

>> commit work.

>>

>> However, DC still expects itself to be single-threaded for anything

>> that can issue register writes. Screen corruption or hangs can occur

>> if write sequences overlap. Every call that potentially perform

>> register writes needs to be guarded for asynchronous updates to work.

>> The dc_lock mutex was added for this.

> 

> As far as page flip related register writes concerned this I think this

> was never an issue - we always

> supported fully concurrent page flips on different CRTCs meaning writing

> surface address concurrently

> on different pipes. So Are you sure you  can't do cursor update

> concurrently against at least page flips ?

> I am not sure about full updates.

> 

> Andrey


I think this was true before we started locking the pipes around cursor 
updates (and probably only for dce). The problem that occur here is 
writing to the lock register from multiple threads at the same time.

In general I think the "contract" between dm/dc now is that anything 
from dc that does register write sequences can't be called from multiple 
threads.

Nicholas Kazlauskas

> 

>>

>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>

>> Cc: Leo Li <sunpeng.li@amd.com>

>> Cc: Harry Wentland <harry.wentland@amd.com>

>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>> ---

>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>>    2 files changed, 73 insertions(+), 2 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>> index 23d61570df17..4df14a50a8ac 100644

>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>> @@ -57,6 +57,7 @@

>>    

>>    #include <drm/drmP.h>

>>    #include <drm/drm_atomic.h>

>> +#include <drm/drm_atomic_uapi.h>

>>    #include <drm/drm_atomic_helper.h>

>>    #include <drm/drm_dp_mst_helper.h>

>>    #include <drm/drm_fb_helper.h>

>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>>    static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>    				  struct drm_atomic_state *state);

>>    

>> +static void handle_cursor_update(struct drm_plane *plane,

>> +				 struct drm_plane_state *old_plane_state);

>>    

>>    

>>    

>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>>    	/* Zero all the fields */

>>    	memset(&init_data, 0, sizeof(init_data));

>>    

>> +	mutex_init(&adev->dm.dc_lock);

>> +

>>    	if(amdgpu_dm_irq_init(adev)) {

>>    		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>>    		goto error;

>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>>    	/* DC Destroy TODO: Replace destroy DAL */

>>    	if (adev->dm.dc)

>>    		dc_destroy(&adev->dm.dc);

>> +

>> +	mutex_destroy(&adev->dm.dc_lock);

>> +

>>    	return;

>>    }

>>    

>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>>    	return -EINVAL;

>>    }

>>    

>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

>> +				       struct drm_plane_state *new_plane_state)

>> +{

>> +	/* Only support async updates on cursor planes. */

>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

>> +		return -EINVAL;

>> +

>> +	return 0;

>> +}

>> +

>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

>> +					 struct drm_plane_state *new_state)

>> +{

>> +	struct drm_plane_state *old_state =

>> +		drm_atomic_get_old_plane_state(new_state->state, plane);

>> +

>> +	if (plane->state->fb != new_state->fb)

>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

>> +

>> +	plane->state->src_x = new_state->src_x;

>> +	plane->state->src_y = new_state->src_y;

>> +	plane->state->src_w = new_state->src_w;

>> +	plane->state->src_h = new_state->src_h;

>> +	plane->state->crtc_x = new_state->crtc_x;

>> +	plane->state->crtc_y = new_state->crtc_y;

>> +	plane->state->crtc_w = new_state->crtc_w;

>> +	plane->state->crtc_h = new_state->crtc_h;

>> +

>> +	handle_cursor_update(plane, old_state);

>> +}

>> +

>>    static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>>    	.prepare_fb = dm_plane_helper_prepare_fb,

>>    	.cleanup_fb = dm_plane_helper_cleanup_fb,

>>    	.atomic_check = dm_plane_atomic_check,

>> +	.atomic_async_check = dm_plane_atomic_async_check,

>> +	.atomic_async_update = dm_plane_atomic_async_update

>>    };

>>    

>>    /*

>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>>    static void handle_cursor_update(struct drm_plane *plane,

>>    				 struct drm_plane_state *old_plane_state)

>>    {

>> +	struct amdgpu_device *adev = plane->dev->dev_private;

>>    	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>>    	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>>    	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>>    

>>    	if (!position.enable) {

>>    		/* turn off cursor */

>> -		if (crtc_state && crtc_state->stream)

>> +		if (crtc_state && crtc_state->stream) {

>> +			mutex_lock(&adev->dm.dc_lock);

>>    			dc_stream_set_cursor_position(crtc_state->stream,

>>    						      &position);

>> +			mutex_unlock(&adev->dm.dc_lock);

>> +		}

>>    		return;

>>    	}

>>    

>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>    	attributes.pitch = attributes.width;

>>    

>>    	if (crtc_state->stream) {

>> +		mutex_lock(&adev->dm.dc_lock);

>>    		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>>    							 &attributes))

>>    			DRM_ERROR("DC failed to set cursor attributes\n");

>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>    		if (!dc_stream_set_cursor_position(crtc_state->stream,

>>    						   &position))

>>    			DRM_ERROR("DC failed to set cursor position\n");

>> +		mutex_unlock(&adev->dm.dc_lock);

>>    	}

>>    }

>>    

>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>    				&acrtc_state->stream->vrr_infopacket;

>>    	}

>>    

>> +	mutex_lock(&adev->dm.dc_lock);

>>    	dc_commit_updates_for_stream(adev->dm.dc,

>>    					     surface_updates,

>>    					     1,

>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>    					     &stream_update,

>>    					     &surface_updates->surface,

>>    					     state);

>> +	mutex_unlock(&adev->dm.dc_lock);

>>    

>>    	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>>    			 __func__,

>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>     * with a dc_plane_state and follow the atomic model a bit more closely here.

>>     */

>>    static bool commit_planes_to_stream(

>> +		struct amdgpu_display_manager *dm,

>>    		struct dc *dc,

>>    		struct dc_plane_state **plane_states,

>>    		uint8_t new_plane_count,

>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>>    		updates[i].scaling_info = &scaling_info[i];

>>    	}

>>    

>> +	mutex_lock(&dm->dc_lock);

>>    	dc_commit_updates_for_stream(

>>    			dc,

>>    			updates,

>>    			new_plane_count,

>>    			dc_stream, stream_update, plane_states, state);

>> +	mutex_unlock(&dm->dc_lock);

>>    

>>    	kfree(flip_addr);

>>    	kfree(plane_info);

>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>>    

>>    		dc_stream_attach->abm_level = acrtc_state->abm_level;

>>    

>> -		if (false == commit_planes_to_stream(dm->dc,

>> +		if (false == commit_planes_to_stream(dm,

>> +							dm->dc,

>>    							plane_states_constructed,

>>    							planes_count,

>>    							acrtc_state,

>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>    

>>    	if (dc_state) {

>>    		dm_enable_per_frame_crtc_master_sync(dc_state);

>> +		mutex_lock(&dm->dc_lock);

>>    		WARN_ON(!dc_commit_state(dm->dc, dc_state));

>> +		mutex_unlock(&dm->dc_lock);

>>    	}

>>    

>>    	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>    

>>    		/*TODO How it works with MPO ?*/

>>    		if (!commit_planes_to_stream(

>> +				dm,

>>    				dm->dc,

>>    				status->plane_states,

>>    				status->plane_count,

>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>    			ret = -EINVAL;

>>    			goto fail;

>>    		}

>> +	} else if (state->legacy_cursor_update) {

>> +		/*

>> +		 * This is a fast cursor update coming from the plane update

>> +		 * helper, check if it can be done asynchronously for better

>> +		 * performance.

>> +		 */

>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>>    	}

>>    

>>    	/* Must be success */

>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>> index 4326dc256491..25bb91ee80ba 100644

>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>>    

>>    	struct drm_modeset_lock atomic_obj_lock;

>>    

>> +	/**

>> +	 * @dc_lock:

>> +	 *

>> +	 * Guards access to DC functions that can issue register write

>> +	 * sequences.

>> +	 */

>> +	struct mutex dc_lock;

>> +

>>    	/**

>>    	 * @irq_handler_list_low_tab:

>>    	 *

>
On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:

>>

>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:

>>> [Why]

>>> Legacy cursor plane updates from drm helpers go through the full

>>> atomic codepath. A high volume of cursor updates through this slow

>>> code path can cause subsequent page-flips to skip vblank intervals

>>> since each individual update is slow.

>>>

>>> This problem is particularly noticeable for the compton compositor.

>>>

>>> [How]

>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>> commit support provided by async_check and async_update. These don't do

>>> a full state/flip_done dependency stall and they don't block other

>>> commit work.

>>>

>>> However, DC still expects itself to be single-threaded for anything

>>> that can issue register writes. Screen corruption or hangs can occur

>>> if write sequences overlap. Every call that potentially perform

>>> register writes needs to be guarded for asynchronous updates to work.

>>> The dc_lock mutex was added for this.

>> As far as page flip related register writes concerned this I think this

>> was never an issue - we always

>> supported fully concurrent page flips on different CRTCs meaning writing

>> surface address concurrently

>> on different pipes. So Are you sure you  can't do cursor update

>> concurrently against at least page flips ?

>> I am not sure about full updates.

>>

>> Andrey

> I think this was true before we started locking the pipes around cursor

> updates (and probably only for dce). The problem that occur here is

> writing to the lock register from multiple threads at the same time.

>

> In general I think the "contract" between dm/dc now is that anything

> from dc that does register write sequences can't be called from multiple

> threads.

>

> Nicholas Kazlauskas


Ok, do note that you also serialize all the page flips now which looks 
unneeded - maybe consider r/w lock to at least avoid that.

Andrey

>

>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>

>>> Cc: Leo Li <sunpeng.li@amd.com>

>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>> ---

>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>>>     2 files changed, 73 insertions(+), 2 deletions(-)

>>>

>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>> index 23d61570df17..4df14a50a8ac 100644

>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>> @@ -57,6 +57,7 @@

>>>     

>>>     #include <drm/drmP.h>

>>>     #include <drm/drm_atomic.h>

>>> +#include <drm/drm_atomic_uapi.h>

>>>     #include <drm/drm_atomic_helper.h>

>>>     #include <drm/drm_dp_mst_helper.h>

>>>     #include <drm/drm_fb_helper.h>

>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>>>     static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>     				  struct drm_atomic_state *state);

>>>     

>>> +static void handle_cursor_update(struct drm_plane *plane,

>>> +				 struct drm_plane_state *old_plane_state);

>>>     

>>>     

>>>     

>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>>>     	/* Zero all the fields */

>>>     	memset(&init_data, 0, sizeof(init_data));

>>>     

>>> +	mutex_init(&adev->dm.dc_lock);

>>> +

>>>     	if(amdgpu_dm_irq_init(adev)) {

>>>     		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>>>     		goto error;

>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>>>     	/* DC Destroy TODO: Replace destroy DAL */

>>>     	if (adev->dm.dc)

>>>     		dc_destroy(&adev->dm.dc);

>>> +

>>> +	mutex_destroy(&adev->dm.dc_lock);

>>> +

>>>     	return;

>>>     }

>>>     

>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>>>     	return -EINVAL;

>>>     }

>>>     

>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

>>> +				       struct drm_plane_state *new_plane_state)

>>> +{

>>> +	/* Only support async updates on cursor planes. */

>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

>>> +		return -EINVAL;

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

>>> +					 struct drm_plane_state *new_state)

>>> +{

>>> +	struct drm_plane_state *old_state =

>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);

>>> +

>>> +	if (plane->state->fb != new_state->fb)

>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

>>> +

>>> +	plane->state->src_x = new_state->src_x;

>>> +	plane->state->src_y = new_state->src_y;

>>> +	plane->state->src_w = new_state->src_w;

>>> +	plane->state->src_h = new_state->src_h;

>>> +	plane->state->crtc_x = new_state->crtc_x;

>>> +	plane->state->crtc_y = new_state->crtc_y;

>>> +	plane->state->crtc_w = new_state->crtc_w;

>>> +	plane->state->crtc_h = new_state->crtc_h;

>>> +

>>> +	handle_cursor_update(plane, old_state);

>>> +}

>>> +

>>>     static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>>>     	.prepare_fb = dm_plane_helper_prepare_fb,

>>>     	.cleanup_fb = dm_plane_helper_cleanup_fb,

>>>     	.atomic_check = dm_plane_atomic_check,

>>> +	.atomic_async_check = dm_plane_atomic_async_check,

>>> +	.atomic_async_update = dm_plane_atomic_async_update

>>>     };

>>>     

>>>     /*

>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>>>     static void handle_cursor_update(struct drm_plane *plane,

>>>     				 struct drm_plane_state *old_plane_state)

>>>     {

>>> +	struct amdgpu_device *adev = plane->dev->dev_private;

>>>     	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>>>     	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>>>     	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>     

>>>     	if (!position.enable) {

>>>     		/* turn off cursor */

>>> -		if (crtc_state && crtc_state->stream)

>>> +		if (crtc_state && crtc_state->stream) {

>>> +			mutex_lock(&adev->dm.dc_lock);

>>>     			dc_stream_set_cursor_position(crtc_state->stream,

>>>     						      &position);

>>> +			mutex_unlock(&adev->dm.dc_lock);

>>> +		}

>>>     		return;

>>>     	}

>>>     

>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>     	attributes.pitch = attributes.width;

>>>     

>>>     	if (crtc_state->stream) {

>>> +		mutex_lock(&adev->dm.dc_lock);

>>>     		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>>>     							 &attributes))

>>>     			DRM_ERROR("DC failed to set cursor attributes\n");

>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>     		if (!dc_stream_set_cursor_position(crtc_state->stream,

>>>     						   &position))

>>>     			DRM_ERROR("DC failed to set cursor position\n");

>>> +		mutex_unlock(&adev->dm.dc_lock);

>>>     	}

>>>     }

>>>     

>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>     				&acrtc_state->stream->vrr_infopacket;

>>>     	}

>>>     

>>> +	mutex_lock(&adev->dm.dc_lock);

>>>     	dc_commit_updates_for_stream(adev->dm.dc,

>>>     					     surface_updates,

>>>     					     1,

>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>     					     &stream_update,

>>>     					     &surface_updates->surface,

>>>     					     state);

>>> +	mutex_unlock(&adev->dm.dc_lock);

>>>     

>>>     	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>>>     			 __func__,

>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>      * with a dc_plane_state and follow the atomic model a bit more closely here.

>>>      */

>>>     static bool commit_planes_to_stream(

>>> +		struct amdgpu_display_manager *dm,

>>>     		struct dc *dc,

>>>     		struct dc_plane_state **plane_states,

>>>     		uint8_t new_plane_count,

>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>>>     		updates[i].scaling_info = &scaling_info[i];

>>>     	}

>>>     

>>> +	mutex_lock(&dm->dc_lock);

>>>     	dc_commit_updates_for_stream(

>>>     			dc,

>>>     			updates,

>>>     			new_plane_count,

>>>     			dc_stream, stream_update, plane_states, state);

>>> +	mutex_unlock(&dm->dc_lock);

>>>     

>>>     	kfree(flip_addr);

>>>     	kfree(plane_info);

>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>>>     

>>>     		dc_stream_attach->abm_level = acrtc_state->abm_level;

>>>     

>>> -		if (false == commit_planes_to_stream(dm->dc,

>>> +		if (false == commit_planes_to_stream(dm,

>>> +							dm->dc,

>>>     							plane_states_constructed,

>>>     							planes_count,

>>>     							acrtc_state,

>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>     

>>>     	if (dc_state) {

>>>     		dm_enable_per_frame_crtc_master_sync(dc_state);

>>> +		mutex_lock(&dm->dc_lock);

>>>     		WARN_ON(!dc_commit_state(dm->dc, dc_state));

>>> +		mutex_unlock(&dm->dc_lock);

>>>     	}

>>>     

>>>     	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>     

>>>     		/*TODO How it works with MPO ?*/

>>>     		if (!commit_planes_to_stream(

>>> +				dm,

>>>     				dm->dc,

>>>     				status->plane_states,

>>>     				status->plane_count,

>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>     			ret = -EINVAL;

>>>     			goto fail;

>>>     		}

>>> +	} else if (state->legacy_cursor_update) {

>>> +		/*

>>> +		 * This is a fast cursor update coming from the plane update

>>> +		 * helper, check if it can be done asynchronously for better

>>> +		 * performance.

>>> +		 */

>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>>>     	}

>>>     

>>>     	/* Must be success */

>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>> index 4326dc256491..25bb91ee80ba 100644

>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>>>     

>>>     	struct drm_modeset_lock atomic_obj_lock;

>>>     

>>> +	/**

>>> +	 * @dc_lock:

>>> +	 *

>>> +	 * Guards access to DC functions that can issue register write

>>> +	 * sequences.

>>> +	 */

>>> +	struct mutex dc_lock;

>>> +

>>>     	/**

>>>     	 * @irq_handler_list_low_tab:

>>>     	 *
On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
> 

> 

> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:

>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:

>>>

>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:

>>>> [Why]

>>>> Legacy cursor plane updates from drm helpers go through the full

>>>> atomic codepath. A high volume of cursor updates through this slow

>>>> code path can cause subsequent page-flips to skip vblank intervals

>>>> since each individual update is slow.

>>>>

>>>> This problem is particularly noticeable for the compton compositor.

>>>>

>>>> [How]

>>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>>> commit support provided by async_check and async_update. These don't do

>>>> a full state/flip_done dependency stall and they don't block other

>>>> commit work.

>>>>

>>>> However, DC still expects itself to be single-threaded for anything

>>>> that can issue register writes. Screen corruption or hangs can occur

>>>> if write sequences overlap. Every call that potentially perform

>>>> register writes needs to be guarded for asynchronous updates to work.

>>>> The dc_lock mutex was added for this.

>>> As far as page flip related register writes concerned this I think this

>>> was never an issue - we always

>>> supported fully concurrent page flips on different CRTCs meaning writing

>>> surface address concurrently

>>> on different pipes. So Are you sure you  can't do cursor update

>>> concurrently against at least page flips ?

>>> I am not sure about full updates.

>>>

>>> Andrey

>> I think this was true before we started locking the pipes around cursor

>> updates (and probably only for dce). The problem that occur here is

>> writing to the lock register from multiple threads at the same time.

>>

>> In general I think the "contract" between dm/dc now is that anything

>> from dc that does register write sequences can't be called from multiple

>> threads.

>>

>> Nicholas Kazlauskas

> 

> Ok, do note that you also serialize all the page flips now which looks

> unneeded - maybe consider r/w lock to at least avoid that.

> 

> Andrey


Yeah, they definitely shouldn't be happening at the same time as the 
cursor calls.

I'd need to look into whether it's okay now to do concurrent writes from 
the stream update functions though, but I'd imagine it's not something 
we'd want to be doing. A r/w lock would work if we could though.

Nicholas Kazlauskas

> 

>>

>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>>

>>>> Cc: Leo Li <sunpeng.li@amd.com>

>>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>>> ---

>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>>>>      2 files changed, 73 insertions(+), 2 deletions(-)

>>>>

>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>> index 23d61570df17..4df14a50a8ac 100644

>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>> @@ -57,6 +57,7 @@

>>>>      

>>>>      #include <drm/drmP.h>

>>>>      #include <drm/drm_atomic.h>

>>>> +#include <drm/drm_atomic_uapi.h>

>>>>      #include <drm/drm_atomic_helper.h>

>>>>      #include <drm/drm_dp_mst_helper.h>

>>>>      #include <drm/drm_fb_helper.h>

>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>>>>      static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>>      				  struct drm_atomic_state *state);

>>>>      

>>>> +static void handle_cursor_update(struct drm_plane *plane,

>>>> +				 struct drm_plane_state *old_plane_state);

>>>>      

>>>>      

>>>>      

>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>>>>      	/* Zero all the fields */

>>>>      	memset(&init_data, 0, sizeof(init_data));

>>>>      

>>>> +	mutex_init(&adev->dm.dc_lock);

>>>> +

>>>>      	if(amdgpu_dm_irq_init(adev)) {

>>>>      		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>>>>      		goto error;

>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>>>>      	/* DC Destroy TODO: Replace destroy DAL */

>>>>      	if (adev->dm.dc)

>>>>      		dc_destroy(&adev->dm.dc);

>>>> +

>>>> +	mutex_destroy(&adev->dm.dc_lock);

>>>> +

>>>>      	return;

>>>>      }

>>>>      

>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>>>>      	return -EINVAL;

>>>>      }

>>>>      

>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

>>>> +				       struct drm_plane_state *new_plane_state)

>>>> +{

>>>> +	/* Only support async updates on cursor planes. */

>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

>>>> +		return -EINVAL;

>>>> +

>>>> +	return 0;

>>>> +}

>>>> +

>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

>>>> +					 struct drm_plane_state *new_state)

>>>> +{

>>>> +	struct drm_plane_state *old_state =

>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);

>>>> +

>>>> +	if (plane->state->fb != new_state->fb)

>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

>>>> +

>>>> +	plane->state->src_x = new_state->src_x;

>>>> +	plane->state->src_y = new_state->src_y;

>>>> +	plane->state->src_w = new_state->src_w;

>>>> +	plane->state->src_h = new_state->src_h;

>>>> +	plane->state->crtc_x = new_state->crtc_x;

>>>> +	plane->state->crtc_y = new_state->crtc_y;

>>>> +	plane->state->crtc_w = new_state->crtc_w;

>>>> +	plane->state->crtc_h = new_state->crtc_h;

>>>> +

>>>> +	handle_cursor_update(plane, old_state);

>>>> +}

>>>> +

>>>>      static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>>>>      	.prepare_fb = dm_plane_helper_prepare_fb,

>>>>      	.cleanup_fb = dm_plane_helper_cleanup_fb,

>>>>      	.atomic_check = dm_plane_atomic_check,

>>>> +	.atomic_async_check = dm_plane_atomic_async_check,

>>>> +	.atomic_async_update = dm_plane_atomic_async_update

>>>>      };

>>>>      

>>>>      /*

>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>>>>      static void handle_cursor_update(struct drm_plane *plane,

>>>>      				 struct drm_plane_state *old_plane_state)

>>>>      {

>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;

>>>>      	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>>>>      	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>>>>      	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>      

>>>>      	if (!position.enable) {

>>>>      		/* turn off cursor */

>>>> -		if (crtc_state && crtc_state->stream)

>>>> +		if (crtc_state && crtc_state->stream) {

>>>> +			mutex_lock(&adev->dm.dc_lock);

>>>>      			dc_stream_set_cursor_position(crtc_state->stream,

>>>>      						      &position);

>>>> +			mutex_unlock(&adev->dm.dc_lock);

>>>> +		}

>>>>      		return;

>>>>      	}

>>>>      

>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>      	attributes.pitch = attributes.width;

>>>>      

>>>>      	if (crtc_state->stream) {

>>>> +		mutex_lock(&adev->dm.dc_lock);

>>>>      		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>>>>      							 &attributes))

>>>>      			DRM_ERROR("DC failed to set cursor attributes\n");

>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>      		if (!dc_stream_set_cursor_position(crtc_state->stream,

>>>>      						   &position))

>>>>      			DRM_ERROR("DC failed to set cursor position\n");

>>>> +		mutex_unlock(&adev->dm.dc_lock);

>>>>      	}

>>>>      }

>>>>      

>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>      				&acrtc_state->stream->vrr_infopacket;

>>>>      	}

>>>>      

>>>> +	mutex_lock(&adev->dm.dc_lock);

>>>>      	dc_commit_updates_for_stream(adev->dm.dc,

>>>>      					     surface_updates,

>>>>      					     1,

>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>      					     &stream_update,

>>>>      					     &surface_updates->surface,

>>>>      					     state);

>>>> +	mutex_unlock(&adev->dm.dc_lock);

>>>>      

>>>>      	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>>>>      			 __func__,

>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>       * with a dc_plane_state and follow the atomic model a bit more closely here.

>>>>       */

>>>>      static bool commit_planes_to_stream(

>>>> +		struct amdgpu_display_manager *dm,

>>>>      		struct dc *dc,

>>>>      		struct dc_plane_state **plane_states,

>>>>      		uint8_t new_plane_count,

>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>>>>      		updates[i].scaling_info = &scaling_info[i];

>>>>      	}

>>>>      

>>>> +	mutex_lock(&dm->dc_lock);

>>>>      	dc_commit_updates_for_stream(

>>>>      			dc,

>>>>      			updates,

>>>>      			new_plane_count,

>>>>      			dc_stream, stream_update, plane_states, state);

>>>> +	mutex_unlock(&dm->dc_lock);

>>>>      

>>>>      	kfree(flip_addr);

>>>>      	kfree(plane_info);

>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>>>>      

>>>>      		dc_stream_attach->abm_level = acrtc_state->abm_level;

>>>>      

>>>> -		if (false == commit_planes_to_stream(dm->dc,

>>>> +		if (false == commit_planes_to_stream(dm,

>>>> +							dm->dc,

>>>>      							plane_states_constructed,

>>>>      							planes_count,

>>>>      							acrtc_state,

>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>>      

>>>>      	if (dc_state) {

>>>>      		dm_enable_per_frame_crtc_master_sync(dc_state);

>>>> +		mutex_lock(&dm->dc_lock);

>>>>      		WARN_ON(!dc_commit_state(dm->dc, dc_state));

>>>> +		mutex_unlock(&dm->dc_lock);

>>>>      	}

>>>>      

>>>>      	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>>      

>>>>      		/*TODO How it works with MPO ?*/

>>>>      		if (!commit_planes_to_stream(

>>>> +				dm,

>>>>      				dm->dc,

>>>>      				status->plane_states,

>>>>      				status->plane_count,

>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>>      			ret = -EINVAL;

>>>>      			goto fail;

>>>>      		}

>>>> +	} else if (state->legacy_cursor_update) {

>>>> +		/*

>>>> +		 * This is a fast cursor update coming from the plane update

>>>> +		 * helper, check if it can be done asynchronously for better

>>>> +		 * performance.

>>>> +		 */

>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>>>>      	}

>>>>      

>>>>      	/* Must be success */

>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>> index 4326dc256491..25bb91ee80ba 100644

>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>>>>      

>>>>      	struct drm_modeset_lock atomic_obj_lock;

>>>>      

>>>> +	/**

>>>> +	 * @dc_lock:

>>>> +	 *

>>>> +	 * Guards access to DC functions that can issue register write

>>>> +	 * sequences.

>>>> +	 */

>>>> +	struct mutex dc_lock;

>>>> +

>>>>      	/**

>>>>      	 * @irq_handler_list_low_tab:

>>>>      	 *

>
On 12/5/18 4:01 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:

>>

>>

>> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:

>>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:

>>>>

>>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:

>>>>> [Why]

>>>>> Legacy cursor plane updates from drm helpers go through the full

>>>>> atomic codepath. A high volume of cursor updates through this slow

>>>>> code path can cause subsequent page-flips to skip vblank intervals

>>>>> since each individual update is slow.

>>>>>

>>>>> This problem is particularly noticeable for the compton compositor.

>>>>>

>>>>> [How]

>>>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>>>> commit support provided by async_check and async_update. These don't do

>>>>> a full state/flip_done dependency stall and they don't block other

>>>>> commit work.

>>>>>

>>>>> However, DC still expects itself to be single-threaded for anything

>>>>> that can issue register writes. Screen corruption or hangs can occur

>>>>> if write sequences overlap. Every call that potentially perform

>>>>> register writes needs to be guarded for asynchronous updates to work.

>>>>> The dc_lock mutex was added for this.

>>>> As far as page flip related register writes concerned this I think this

>>>> was never an issue - we always

>>>> supported fully concurrent page flips on different CRTCs meaning writing

>>>> surface address concurrently

>>>> on different pipes. So Are you sure you  can't do cursor update

>>>> concurrently against at least page flips ?

>>>> I am not sure about full updates.

>>>>

>>>> Andrey

>>> I think this was true before we started locking the pipes around cursor

>>> updates (and probably only for dce). The problem that occur here is

>>> writing to the lock register from multiple threads at the same time.

>>>

>>> In general I think the "contract" between dm/dc now is that anything

>>> from dc that does register write sequences can't be called from multiple

>>> threads.

>>>

>>> Nicholas Kazlauskas

>>

>> Ok, do note that you also serialize all the page flips now which looks

>> unneeded - maybe consider r/w lock to at least avoid that.

>>

>> Andrey

> 

> Yeah, they definitely shouldn't be happening at the same time as the

> cursor calls.

> 

> I'd need to look into whether it's okay now to do concurrent writes from

> the stream update functions though, but I'd imagine it's not something

> we'd want to be doing. A r/w lock would work if we could though.

> 

> Nicholas Kazlauskas


A reader/writer lock wouldn't help here, actually.

Fast updates (and page flips in particular) need to lock the pipe as 
well, see: commit_planes_for_stream.

Nicholas Kazlauskas

> 

>>

>>>

>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>>>

>>>>> Cc: Leo Li <sunpeng.li@amd.com>

>>>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>>>> ---

>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>>>>>       2 files changed, 73 insertions(+), 2 deletions(-)

>>>>>

>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>>> index 23d61570df17..4df14a50a8ac 100644

>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

>>>>> @@ -57,6 +57,7 @@

>>>>>       

>>>>>       #include <drm/drmP.h>

>>>>>       #include <drm/drm_atomic.h>

>>>>> +#include <drm/drm_atomic_uapi.h>

>>>>>       #include <drm/drm_atomic_helper.h>

>>>>>       #include <drm/drm_dp_mst_helper.h>

>>>>>       #include <drm/drm_fb_helper.h>

>>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>>>>>       static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>>>       				  struct drm_atomic_state *state);

>>>>>       

>>>>> +static void handle_cursor_update(struct drm_plane *plane,

>>>>> +				 struct drm_plane_state *old_plane_state);

>>>>>       

>>>>>       

>>>>>       

>>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>>>>>       	/* Zero all the fields */

>>>>>       	memset(&init_data, 0, sizeof(init_data));

>>>>>       

>>>>> +	mutex_init(&adev->dm.dc_lock);

>>>>> +

>>>>>       	if(amdgpu_dm_irq_init(adev)) {

>>>>>       		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>>>>>       		goto error;

>>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>>>>>       	/* DC Destroy TODO: Replace destroy DAL */

>>>>>       	if (adev->dm.dc)

>>>>>       		dc_destroy(&adev->dm.dc);

>>>>> +

>>>>> +	mutex_destroy(&adev->dm.dc_lock);

>>>>> +

>>>>>       	return;

>>>>>       }

>>>>>       

>>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>>>>>       	return -EINVAL;

>>>>>       }

>>>>>       

>>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

>>>>> +				       struct drm_plane_state *new_plane_state)

>>>>> +{

>>>>> +	/* Only support async updates on cursor planes. */

>>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

>>>>> +		return -EINVAL;

>>>>> +

>>>>> +	return 0;

>>>>> +}

>>>>> +

>>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

>>>>> +					 struct drm_plane_state *new_state)

>>>>> +{

>>>>> +	struct drm_plane_state *old_state =

>>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);

>>>>> +

>>>>> +	if (plane->state->fb != new_state->fb)

>>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

>>>>> +

>>>>> +	plane->state->src_x = new_state->src_x;

>>>>> +	plane->state->src_y = new_state->src_y;

>>>>> +	plane->state->src_w = new_state->src_w;

>>>>> +	plane->state->src_h = new_state->src_h;

>>>>> +	plane->state->crtc_x = new_state->crtc_x;

>>>>> +	plane->state->crtc_y = new_state->crtc_y;

>>>>> +	plane->state->crtc_w = new_state->crtc_w;

>>>>> +	plane->state->crtc_h = new_state->crtc_h;

>>>>> +

>>>>> +	handle_cursor_update(plane, old_state);

>>>>> +}

>>>>> +

>>>>>       static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>>>>>       	.prepare_fb = dm_plane_helper_prepare_fb,

>>>>>       	.cleanup_fb = dm_plane_helper_cleanup_fb,

>>>>>       	.atomic_check = dm_plane_atomic_check,

>>>>> +	.atomic_async_check = dm_plane_atomic_async_check,

>>>>> +	.atomic_async_update = dm_plane_atomic_async_update

>>>>>       };

>>>>>       

>>>>>       /*

>>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>>>>>       static void handle_cursor_update(struct drm_plane *plane,

>>>>>       				 struct drm_plane_state *old_plane_state)

>>>>>       {

>>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;

>>>>>       	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>>>>>       	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>>>>>       	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

>>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>>       

>>>>>       	if (!position.enable) {

>>>>>       		/* turn off cursor */

>>>>> -		if (crtc_state && crtc_state->stream)

>>>>> +		if (crtc_state && crtc_state->stream) {

>>>>> +			mutex_lock(&adev->dm.dc_lock);

>>>>>       			dc_stream_set_cursor_position(crtc_state->stream,

>>>>>       						      &position);

>>>>> +			mutex_unlock(&adev->dm.dc_lock);

>>>>> +		}

>>>>>       		return;

>>>>>       	}

>>>>>       

>>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>>       	attributes.pitch = attributes.width;

>>>>>       

>>>>>       	if (crtc_state->stream) {

>>>>> +		mutex_lock(&adev->dm.dc_lock);

>>>>>       		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>>>>>       							 &attributes))

>>>>>       			DRM_ERROR("DC failed to set cursor attributes\n");

>>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>>>>>       		if (!dc_stream_set_cursor_position(crtc_state->stream,

>>>>>       						   &position))

>>>>>       			DRM_ERROR("DC failed to set cursor position\n");

>>>>> +		mutex_unlock(&adev->dm.dc_lock);

>>>>>       	}

>>>>>       }

>>>>>       

>>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>>       				&acrtc_state->stream->vrr_infopacket;

>>>>>       	}

>>>>>       

>>>>> +	mutex_lock(&adev->dm.dc_lock);

>>>>>       	dc_commit_updates_for_stream(adev->dm.dc,

>>>>>       					     surface_updates,

>>>>>       					     1,

>>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>>       					     &stream_update,

>>>>>       					     &surface_updates->surface,

>>>>>       					     state);

>>>>> +	mutex_unlock(&adev->dm.dc_lock);

>>>>>       

>>>>>       	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>>>>>       			 __func__,

>>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>>>>>        * with a dc_plane_state and follow the atomic model a bit more closely here.

>>>>>        */

>>>>>       static bool commit_planes_to_stream(

>>>>> +		struct amdgpu_display_manager *dm,

>>>>>       		struct dc *dc,

>>>>>       		struct dc_plane_state **plane_states,

>>>>>       		uint8_t new_plane_count,

>>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>>>>>       		updates[i].scaling_info = &scaling_info[i];

>>>>>       	}

>>>>>       

>>>>> +	mutex_lock(&dm->dc_lock);

>>>>>       	dc_commit_updates_for_stream(

>>>>>       			dc,

>>>>>       			updates,

>>>>>       			new_plane_count,

>>>>>       			dc_stream, stream_update, plane_states, state);

>>>>> +	mutex_unlock(&dm->dc_lock);

>>>>>       

>>>>>       	kfree(flip_addr);

>>>>>       	kfree(plane_info);

>>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>>>>>       

>>>>>       		dc_stream_attach->abm_level = acrtc_state->abm_level;

>>>>>       

>>>>> -		if (false == commit_planes_to_stream(dm->dc,

>>>>> +		if (false == commit_planes_to_stream(dm,

>>>>> +							dm->dc,

>>>>>       							plane_states_constructed,

>>>>>       							planes_count,

>>>>>       							acrtc_state,

>>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>>>       

>>>>>       	if (dc_state) {

>>>>>       		dm_enable_per_frame_crtc_master_sync(dc_state);

>>>>> +		mutex_lock(&dm->dc_lock);

>>>>>       		WARN_ON(!dc_commit_state(dm->dc, dc_state));

>>>>> +		mutex_unlock(&dm->dc_lock);

>>>>>       	}

>>>>>       

>>>>>       	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

>>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>>>>>       

>>>>>       		/*TODO How it works with MPO ?*/

>>>>>       		if (!commit_planes_to_stream(

>>>>> +				dm,

>>>>>       				dm->dc,

>>>>>       				status->plane_states,

>>>>>       				status->plane_count,

>>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>>>>>       			ret = -EINVAL;

>>>>>       			goto fail;

>>>>>       		}

>>>>> +	} else if (state->legacy_cursor_update) {

>>>>> +		/*

>>>>> +		 * This is a fast cursor update coming from the plane update

>>>>> +		 * helper, check if it can be done asynchronously for better

>>>>> +		 * performance.

>>>>> +		 */

>>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>>>>>       	}

>>>>>       

>>>>>       	/* Must be success */

>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>>> index 4326dc256491..25bb91ee80ba 100644

>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

>>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>>>>>       

>>>>>       	struct drm_modeset_lock atomic_obj_lock;

>>>>>       

>>>>> +	/**

>>>>> +	 * @dc_lock:

>>>>> +	 *

>>>>> +	 * Guards access to DC functions that can issue register write

>>>>> +	 * sequences.

>>>>> +	 */

>>>>> +	struct mutex dc_lock;

>>>>> +

>>>>>       	/**

>>>>>       	 * @irq_handler_list_low_tab:

>>>>>       	 *

>>

>
Not an expert on Freesync so maybe stupid question but from he comment 
looks like this pipe locking is only for the sake of Freesync mode there 
- why is it then called unconditionally w/o checking if you even run in 
Freesync mode ?

Andrey


On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:
> A reader/writer lock wouldn't help here, actually.

>

> Fast updates (and page flips in particular) need to lock the pipe as

> well, see: commit_planes_for_stream.

>

> Nicholas Kazlauskas
On 2018-12-06 10:36 a.m., Grodzovsky, Andrey wrote:
> Not an expert on Freesync so maybe stupid question but from he comment

> looks like this pipe locking is only for the sake of Freesync mode there

> - why is it then called unconditionally w/o checking if you even run in

> Freesync mode ?

> 

> Andrey

I don't think there's any sort of state tracking done in DC for FreeSync 
state changes on a stream so it was probably easier to do this. Given 
the age of the pipe lock addition I would be worried about other 
behavior relying on it now if it was just conditionally done on that.

In practice I think the worst case effect of guarding these calls with a 
mutex is <100us of delay. That's for two separate threads issuing two 
commits at the same time with the planes changed.

Nicholas Kazlauskas

> 

> 

> On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:

>> A reader/writer lock wouldn't help here, actually.

>>

>> Fast updates (and page flips in particular) need to lock the pipe as

>> well, see: commit_planes_for_stream.

>>

>> Nicholas Kazlauskas

>
Ok - the change is Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey


On 12/06/2018 10:59 AM, Nicholas Kazlauskas wrote:
> On 2018-12-06 10:36 a.m., Grodzovsky, Andrey wrote:

>> Not an expert on Freesync so maybe stupid question but from he comment

>> looks like this pipe locking is only for the sake of Freesync mode there

>> - why is it then called unconditionally w/o checking if you even run in

>> Freesync mode ?

>>

>> Andrey

> I don't think there's any sort of state tracking done in DC for FreeSync

> state changes on a stream so it was probably easier to do this. Given

> the age of the pipe lock addition I would be worried about other

> behavior relying on it now if it was just conditionally done on that.

>

> In practice I think the worst case effect of guarding these calls with a

> mutex is <100us of delay. That's for two separate threads issuing two

> commits at the same time with the planes changed.

>

> Nicholas Kazlauskas

>

>>

>> On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:

>>> A reader/writer lock wouldn't help here, actually.

>>>

>>> Fast updates (and page flips in particular) need to lock the pipe as

>>> well, see: commit_planes_for_stream.

>>>

>>> Nicholas Kazlauskas

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 2018-12-05 2:59 p.m., Nicholas Kazlauskas wrote:
> [Why]

> Legacy cursor plane updates from drm helpers go through the full

> atomic codepath. A high volume of cursor updates through this slow

> code path can cause subsequent page-flips to skip vblank intervals

> since each individual update is slow.

> 

> This problem is particularly noticeable for the compton compositor.

> 

> [How]

> A fast path for cursor plane updates is added by using DRM asynchronous

> commit support provided by async_check and async_update. These don't do

> a full state/flip_done dependency stall and they don't block other

> commit work.

> 

> However, DC still expects itself to be single-threaded for anything

> that can issue register writes. Screen corruption or hangs can occur

> if write sequences overlap. Every call that potentially perform

> register writes needs to be guarded for asynchronous updates to work.

> The dc_lock mutex was added for this.

> 

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

> 

> Cc: Leo Li <sunpeng.li@amd.com>

> Cc: Harry Wentland <harry.wentland@amd.com>

> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>


I just skimmed through the calls to DC, but I trust you've locked on
everything that can potentially modify registers.

Might be a good future task to document DC interfaces that are not
thread safe, since it's currently not clear. Although this change does
shine light on that :)

Thanks,

Reviewed-by Leo Li <sunpeng.li@amd.com>

> ---

>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-

>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++

>   2 files changed, 73 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> index 23d61570df17..4df14a50a8ac 100644

> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

> @@ -57,6 +57,7 @@

>   

>   #include <drm/drmP.h>

>   #include <drm/drm_atomic.h>

> +#include <drm/drm_atomic_uapi.h>

>   #include <drm/drm_atomic_helper.h>

>   #include <drm/drm_dp_mst_helper.h>

>   #include <drm/drm_fb_helper.h>

> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);

>   static int amdgpu_dm_atomic_check(struct drm_device *dev,

>   				  struct drm_atomic_state *state);

>   

> +static void handle_cursor_update(struct drm_plane *plane,

> +				 struct drm_plane_state *old_plane_state);

>   

>   

>   

> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)

>   	/* Zero all the fields */

>   	memset(&init_data, 0, sizeof(init_data));

>   

> +	mutex_init(&adev->dm.dc_lock);

> +

>   	if(amdgpu_dm_irq_init(adev)) {

>   		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");

>   		goto error;

> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)

>   	/* DC Destroy TODO: Replace destroy DAL */

>   	if (adev->dm.dc)

>   		dc_destroy(&adev->dm.dc);

> +

> +	mutex_destroy(&adev->dm.dc_lock);

> +

>   	return;

>   }

>   

> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,

>   	return -EINVAL;

>   }

>   

> +static int dm_plane_atomic_async_check(struct drm_plane *plane,

> +				       struct drm_plane_state *new_plane_state)

> +{

> +	/* Only support async updates on cursor planes. */

> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)

> +		return -EINVAL;

> +

> +	return 0;

> +}

> +

> +static void dm_plane_atomic_async_update(struct drm_plane *plane,

> +					 struct drm_plane_state *new_state)

> +{

> +	struct drm_plane_state *old_state =

> +		drm_atomic_get_old_plane_state(new_state->state, plane);

> +

> +	if (plane->state->fb != new_state->fb)

> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);

> +

> +	plane->state->src_x = new_state->src_x;

> +	plane->state->src_y = new_state->src_y;

> +	plane->state->src_w = new_state->src_w;

> +	plane->state->src_h = new_state->src_h;

> +	plane->state->crtc_x = new_state->crtc_x;

> +	plane->state->crtc_y = new_state->crtc_y;

> +	plane->state->crtc_w = new_state->crtc_w;

> +	plane->state->crtc_h = new_state->crtc_h;

> +

> +	handle_cursor_update(plane, old_state);

> +}

> +

>   static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {

>   	.prepare_fb = dm_plane_helper_prepare_fb,

>   	.cleanup_fb = dm_plane_helper_cleanup_fb,

>   	.atomic_check = dm_plane_atomic_check,

> +	.atomic_async_check = dm_plane_atomic_async_check,

> +	.atomic_async_update = dm_plane_atomic_async_update

>   };

>   

>   /*

> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,

>   static void handle_cursor_update(struct drm_plane *plane,

>   				 struct drm_plane_state *old_plane_state)

>   {

> +	struct amdgpu_device *adev = plane->dev->dev_private;

>   	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);

>   	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;

>   	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;

> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,

>   

>   	if (!position.enable) {

>   		/* turn off cursor */

> -		if (crtc_state && crtc_state->stream)

> +		if (crtc_state && crtc_state->stream) {

> +			mutex_lock(&adev->dm.dc_lock);

>   			dc_stream_set_cursor_position(crtc_state->stream,

>   						      &position);

> +			mutex_unlock(&adev->dm.dc_lock);

> +		}

>   		return;

>   	}

>   

> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>   	attributes.pitch = attributes.width;

>   

>   	if (crtc_state->stream) {

> +		mutex_lock(&adev->dm.dc_lock);

>   		if (!dc_stream_set_cursor_attributes(crtc_state->stream,

>   							 &attributes))

>   			DRM_ERROR("DC failed to set cursor attributes\n");

> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,

>   		if (!dc_stream_set_cursor_position(crtc_state->stream,

>   						   &position))

>   			DRM_ERROR("DC failed to set cursor position\n");

> +		mutex_unlock(&adev->dm.dc_lock);

>   	}

>   }

>   

> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>   				&acrtc_state->stream->vrr_infopacket;

>   	}

>   

> +	mutex_lock(&adev->dm.dc_lock);

>   	dc_commit_updates_for_stream(adev->dm.dc,

>   					     surface_updates,

>   					     1,

> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>   					     &stream_update,

>   					     &surface_updates->surface,

>   					     state);

> +	mutex_unlock(&adev->dm.dc_lock);

>   

>   	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",

>   			 __func__,

> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,

>    * with a dc_plane_state and follow the atomic model a bit more closely here.

>    */

>   static bool commit_planes_to_stream(

> +		struct amdgpu_display_manager *dm,

>   		struct dc *dc,

>   		struct dc_plane_state **plane_states,

>   		uint8_t new_plane_count,

> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(

>   		updates[i].scaling_info = &scaling_info[i];

>   	}

>   

> +	mutex_lock(&dm->dc_lock);

>   	dc_commit_updates_for_stream(

>   			dc,

>   			updates,

>   			new_plane_count,

>   			dc_stream, stream_update, plane_states, state);

> +	mutex_unlock(&dm->dc_lock);

>   

>   	kfree(flip_addr);

>   	kfree(plane_info);

> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,

>   

>   		dc_stream_attach->abm_level = acrtc_state->abm_level;

>   

> -		if (false == commit_planes_to_stream(dm->dc,

> +		if (false == commit_planes_to_stream(dm,

> +							dm->dc,

>   							plane_states_constructed,

>   							planes_count,

>   							acrtc_state,

> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>   

>   	if (dc_state) {

>   		dm_enable_per_frame_crtc_master_sync(dc_state);

> +		mutex_lock(&dm->dc_lock);

>   		WARN_ON(!dc_commit_state(dm->dc, dc_state));

> +		mutex_unlock(&dm->dc_lock);

>   	}

>   

>   	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {

> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)

>   

>   		/*TODO How it works with MPO ?*/

>   		if (!commit_planes_to_stream(

> +				dm,

>   				dm->dc,

>   				status->plane_states,

>   				status->plane_count,

> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

>   			ret = -EINVAL;

>   			goto fail;

>   		}

> +	} else if (state->legacy_cursor_update) {

> +		/*

> +		 * This is a fast cursor update coming from the plane update

> +		 * helper, check if it can be done asynchronously for better

> +		 * performance.

> +		 */

> +		state->async_update = !drm_atomic_helper_async_check(dev, state);

>   	}

>   

>   	/* Must be success */

> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> index 4326dc256491..25bb91ee80ba 100644

> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {

>   

>   	struct drm_modeset_lock atomic_obj_lock;

>   

> +	/**

> +	 * @dc_lock:

> +	 *

> +	 * Guards access to DC functions that can issue register write

> +	 * sequences.

> +	 */

> +	struct mutex dc_lock;

> +

>   	/**

>   	 * @irq_handler_list_low_tab:

>   	 *

>
On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
> 
> This problem is particularly noticeable for the compton compositor.
> 
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
> 
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
> 
> Cc: Leo Li <sunpeng.li@amd.com>
> Cc: Harry Wentland <harry.wentland@amd.com>
> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

Looks like this change introduced (or at least exposed) a reference
counting bug resulting in use-after-free when Xorg shuts down[0]. See
the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
in amdgpu_bo_unpin in WARN_ON_ONCE).


[0] Only with
https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
, i.e. alternating between two BOs for the HW cursor, instead of always
using the same one.
On 12/13/18 10:48 AM, Michel Dänzer wrote:
> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:

>> [Why]

>> Legacy cursor plane updates from drm helpers go through the full

>> atomic codepath. A high volume of cursor updates through this slow

>> code path can cause subsequent page-flips to skip vblank intervals

>> since each individual update is slow.

>>

>> This problem is particularly noticeable for the compton compositor.

>>

>> [How]

>> A fast path for cursor plane updates is added by using DRM asynchronous

>> commit support provided by async_check and async_update. These don't do

>> a full state/flip_done dependency stall and they don't block other

>> commit work.

>>

>> However, DC still expects itself to be single-threaded for anything

>> that can issue register writes. Screen corruption or hangs can occur

>> if write sequences overlap. Every call that potentially perform

>> register writes needs to be guarded for asynchronous updates to work.

>> The dc_lock mutex was added for this.

>>

>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>

>> Cc: Leo Li <sunpeng.li@amd.com>

>> Cc: Harry Wentland <harry.wentland@amd.com>

>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

> 

> Looks like this change introduced (or at least exposed) a reference

> counting bug resulting in use-after-free when Xorg shuts down[0]. See

> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check

> in amdgpu_bo_unpin in WARN_ON_ONCE).

> 

> 

> [0] Only with

> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4

> , i.e. alternating between two BOs for the HW cursor, instead of always

> using the same one.

> 

> 


Thanks for the heads up, I don't think I had that patch in my 
xf86-video-amdgpu when testing the desktop stack.

The async atomic helper does the:

drm_atomic_helper_prepare_planes
drm_atomic_helper_async_commit
drm_atomic_helper_cleanup_planes

...sequence correctly from what I can tell, so maybe it's something with
dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

One case where unref could be called (not following a ref) is during 
drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb 
gets called regardless, and we only ref the fb if prepare_fb is in the 
success path.

Nicholas Kazlauskas
On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
> On 12/13/18 10:48 AM, Michel Dänzer wrote:

>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:

>>> [Why]

>>> Legacy cursor plane updates from drm helpers go through the full

>>> atomic codepath. A high volume of cursor updates through this slow

>>> code path can cause subsequent page-flips to skip vblank intervals

>>> since each individual update is slow.

>>>

>>> This problem is particularly noticeable for the compton compositor.

>>>

>>> [How]

>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>> commit support provided by async_check and async_update. These don't do

>>> a full state/flip_done dependency stall and they don't block other

>>> commit work.

>>>

>>> However, DC still expects itself to be single-threaded for anything

>>> that can issue register writes. Screen corruption or hangs can occur

>>> if write sequences overlap. Every call that potentially perform

>>> register writes needs to be guarded for asynchronous updates to work.

>>> The dc_lock mutex was added for this.

>>>

>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>

>>> Cc: Leo Li <sunpeng.li@amd.com>

>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>

>> Looks like this change introduced (or at least exposed) a reference

>> counting bug resulting in use-after-free when Xorg shuts down[0]. See

>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check

>> in amdgpu_bo_unpin in WARN_ON_ONCE).

>>

>>

>> [0] Only with

>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4

>> , i.e. alternating between two BOs for the HW cursor, instead of always

>> using the same one.

>>

>>

> 

> Thanks for the heads up, I don't think I had that patch in my

> xf86-video-amdgpu when testing the desktop stack.

> 

> The async atomic helper does the:

> 

> drm_atomic_helper_prepare_planes

> drm_atomic_helper_async_commit

> drm_atomic_helper_cleanup_planes

> 

> ...sequence correctly from what I can tell, so maybe it's something with

> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

> 

> One case where unref could be called (not following a ref) is during

> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb

> gets called regardless, and we only ref the fb if prepare_fb is in the

> success path.

> 

> Nicholas Kazlauskas

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

> 


The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only 
gets called on planes that had prepare_fb succeed in all cases as far as 
I can tell.

I think the bug here might be forgetting to set the plane->state to the 
new_state. The cleanup fb callback decides whether to call it on the old 
plane state or new plane state depending on if the commit was aborted or 
not. I think every fast plane update might be treated as aborted in this 
case.

Nicholas Kazlauskas
On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:

>> On 12/13/18 10:48 AM, Michel Dänzer wrote:

>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:

>>>> [Why]

>>>> Legacy cursor plane updates from drm helpers go through the full

>>>> atomic codepath. A high volume of cursor updates through this slow

>>>> code path can cause subsequent page-flips to skip vblank intervals

>>>> since each individual update is slow.

>>>>

>>>> This problem is particularly noticeable for the compton compositor.

>>>>

>>>> [How]

>>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>>> commit support provided by async_check and async_update. These don't do

>>>> a full state/flip_done dependency stall and they don't block other

>>>> commit work.

>>>>

>>>> However, DC still expects itself to be single-threaded for anything

>>>> that can issue register writes. Screen corruption or hangs can occur

>>>> if write sequences overlap. Every call that potentially perform

>>>> register writes needs to be guarded for asynchronous updates to work.

>>>> The dc_lock mutex was added for this.

>>>>

>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>>

>>>> Cc: Leo Li <sunpeng.li@amd.com>

>>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>>

>>> Looks like this change introduced (or at least exposed) a reference

>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See

>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check

>>> in amdgpu_bo_unpin in WARN_ON_ONCE).

>>>

>>>

>>> [0] Only with

>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4

>>> , i.e. alternating between two BOs for the HW cursor, instead of always

>>> using the same one.

>>>

>>>

>>

>> Thanks for the heads up, I don't think I had that patch in my

>> xf86-video-amdgpu when testing the desktop stack.

>>

>> The async atomic helper does the:

>>

>> drm_atomic_helper_prepare_planes

>> drm_atomic_helper_async_commit

>> drm_atomic_helper_cleanup_planes

>>

>> ...sequence correctly from what I can tell, so maybe it's something with

>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

>>

>> One case where unref could be called (not following a ref) is during

>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb

>> gets called regardless, and we only ref the fb if prepare_fb is in the

>> success path.

>>

>> Nicholas Kazlauskas

>> _______________________________________________

>> amd-gfx mailing list

>> amd-gfx@lists.freedesktop.org

>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

>>

> 

> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only

> gets called on planes that had prepare_fb succeed in all cases as far as

> I can tell.

> 

> I think the bug here might be forgetting to set the plane->state to the

> new_state. The cleanup fb callback decides whether to call it on the old

> plane state or new plane state depending on if the commit was aborted or

> not. I think every fast plane update might be treated as aborted in this

> case.

> 

> Nicholas Kazlauskas

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

> 


This is a bug with DRM, actually.

Typically for a regular atomic commit the prepare_fb callback is called 
for the new_plane_state and cleanup_fb is called for the old_plane_state 
at the end of commit tail.

However, for asynchronous commits this isn't the same - prepare_fb is 
called for new_plane_state and cleanup_fb is then immediately called 
after also for the new_plane_state.

Looking at your stack trace I can see that this is exactly what causes 
the use after free,

The CRTC has changed so it's stuck in the slow path (commit_tail is in 
the trace). However, the plane->state->fb has already been unpinned and 
unref. But the plane->state->fb is *not* NULL from the previous fast 
update, so when it gets to cleanup planes it tries to free the 
old_plane_state it unpins and unrefs the bo a second time.

Then a new fast cursor update comes along (and the fb hasn't changed) so 
it tries to prepare_fb on the same freed bo.

Nicholas Kazlauskas
On 2018-12-13 9:59 p.m., Kazlauskas, Nicholas wrote:
> On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
>> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
>>> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>>>> [Why]
>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>> since each individual update is slow.
>>>>>
>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>
>>>>> [How]
>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>> commit support provided by async_check and async_update. These don't do
>>>>> a full state/flip_done dependency stall and they don't block other
>>>>> commit work.
>>>>>
>>>>> However, DC still expects itself to be single-threaded for anything
>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>> if write sequences overlap. Every call that potentially perform
>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>> The dc_lock mutex was added for this.
>>>>>
>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>
>>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>>
>>>> Looks like this change introduced (or at least exposed) a reference
>>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>>>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>>>
>>>>
>>>> [0] Only with
>>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>>>> , i.e. alternating between two BOs for the HW cursor, instead of always
>>>> using the same one.
>>>>
>>>>
>>>
>>> Thanks for the heads up, I don't think I had that patch in my
>>> xf86-video-amdgpu when testing the desktop stack.
>>>
>>> The async atomic helper does the:
>>>
>>> drm_atomic_helper_prepare_planes
>>> drm_atomic_helper_async_commit
>>> drm_atomic_helper_cleanup_planes
>>>
>>> ...sequence correctly from what I can tell, so maybe it's something with
>>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
>>>
>>> One case where unref could be called (not following a ref) is during
>>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
>>> gets called regardless, and we only ref the fb if prepare_fb is in the
>>> success path.
>>
>> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only
>> gets called on planes that had prepare_fb succeed in all cases as far as
>> I can tell.
>>
>> I think the bug here might be forgetting to set the plane->state to the
>> new_state. The cleanup fb callback decides whether to call it on the old
>> plane state or new plane state depending on if the commit was aborted or
>> not. I think every fast plane update might be treated as aborted in this
>> case.
> 
> This is a bug with DRM, actually.
> 
> Typically for a regular atomic commit the prepare_fb callback is called 
> for the new_plane_state and cleanup_fb is called for the old_plane_state 
> at the end of commit tail.
> 
> However, for asynchronous commits this isn't the same - prepare_fb is 
> called for new_plane_state and cleanup_fb is then immediately called 
> after also for the new_plane_state.
> 
> Looking at your stack trace I can see that this is exactly what causes 
> the use after free,
> 
> The CRTC has changed so it's stuck in the slow path (commit_tail is in 
> the trace). However, the plane->state->fb has already been unpinned and 
> unref. But the plane->state->fb is *not* NULL from the previous fast 
> update, so when it gets to cleanup planes it tries to free the 
> old_plane_state it unpins and unrefs the bo a second time.
> 
> Then a new fast cursor update comes along (and the fb hasn't changed) so 
> it tries to prepare_fb on the same freed bo.

Do you have an idea for a fix? If not, I'm afraid we need to revert this
change again for now, as the consequences can be severe (in one case,
ext4 code started complaining, I couldn't reboot cleanly and had to fsck
afterwards).
On 12/14/18 3:46 AM, Michel Dänzer wrote:
> On 2018-12-13 9:59 p.m., Kazlauskas, Nicholas wrote:

>> On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:

>>> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:

>>>> On 12/13/18 10:48 AM, Michel Dänzer wrote:

>>>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:

>>>>>> [Why]

>>>>>> Legacy cursor plane updates from drm helpers go through the full

>>>>>> atomic codepath. A high volume of cursor updates through this slow

>>>>>> code path can cause subsequent page-flips to skip vblank intervals

>>>>>> since each individual update is slow.

>>>>>>

>>>>>> This problem is particularly noticeable for the compton compositor.

>>>>>>

>>>>>> [How]

>>>>>> A fast path for cursor plane updates is added by using DRM asynchronous

>>>>>> commit support provided by async_check and async_update. These don't do

>>>>>> a full state/flip_done dependency stall and they don't block other

>>>>>> commit work.

>>>>>>

>>>>>> However, DC still expects itself to be single-threaded for anything

>>>>>> that can issue register writes. Screen corruption or hangs can occur

>>>>>> if write sequences overlap. Every call that potentially perform

>>>>>> register writes needs to be guarded for asynchronous updates to work.

>>>>>> The dc_lock mutex was added for this.

>>>>>>

>>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

>>>>>>

>>>>>> Cc: Leo Li <sunpeng.li@amd.com>

>>>>>> Cc: Harry Wentland <harry.wentland@amd.com>

>>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>>>>

>>>>> Looks like this change introduced (or at least exposed) a reference

>>>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See

>>>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check

>>>>> in amdgpu_bo_unpin in WARN_ON_ONCE).

>>>>>

>>>>>

>>>>> [0] Only with

>>>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4

>>>>> , i.e. alternating between two BOs for the HW cursor, instead of always

>>>>> using the same one.

>>>>>

>>>>>

>>>>

>>>> Thanks for the heads up, I don't think I had that patch in my

>>>> xf86-video-amdgpu when testing the desktop stack.

>>>>

>>>> The async atomic helper does the:

>>>>

>>>> drm_atomic_helper_prepare_planes

>>>> drm_atomic_helper_async_commit

>>>> drm_atomic_helper_cleanup_planes

>>>>

>>>> ...sequence correctly from what I can tell, so maybe it's something with

>>>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

>>>>

>>>> One case where unref could be called (not following a ref) is during

>>>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb

>>>> gets called regardless, and we only ref the fb if prepare_fb is in the

>>>> success path.

>>>

>>> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only

>>> gets called on planes that had prepare_fb succeed in all cases as far as

>>> I can tell.

>>>

>>> I think the bug here might be forgetting to set the plane->state to the

>>> new_state. The cleanup fb callback decides whether to call it on the old

>>> plane state or new plane state depending on if the commit was aborted or

>>> not. I think every fast plane update might be treated as aborted in this

>>> case.

>>

>> This is a bug with DRM, actually.

>>

>> Typically for a regular atomic commit the prepare_fb callback is called

>> for the new_plane_state and cleanup_fb is called for the old_plane_state

>> at the end of commit tail.

>>

>> However, for asynchronous commits this isn't the same - prepare_fb is

>> called for new_plane_state and cleanup_fb is then immediately called

>> after also for the new_plane_state.

>>

>> Looking at your stack trace I can see that this is exactly what causes

>> the use after free,

>>

>> The CRTC has changed so it's stuck in the slow path (commit_tail is in

>> the trace). However, the plane->state->fb has already been unpinned and

>> unref. But the plane->state->fb is *not* NULL from the previous fast

>> update, so when it gets to cleanup planes it tries to free the

>> old_plane_state it unpins and unrefs the bo a second time.

>>

>> Then a new fast cursor update comes along (and the fb hasn't changed) so

>> it tries to prepare_fb on the same freed bo.

> 

> Do you have an idea for a fix? If not, I'm afraid we need to revert this

> change again for now, as the consequences can be severe (in one case,

> ext4 code started complaining, I couldn't reboot cleanly and had to fsck

> afterwards).

> 

> 


Yeah, I have a workaround that I will post for this.

I'm not sure if this is intended behavior or not for DRM - it doesn't 
feel correct to me, but I can understand why it'd make sense in some 
cases. It really depends on what the cleanup_fb is intending to do. It 
certainly doesn't work with the pin/unpin model though.

Nicholas Kazlauskas