[Freedreno,v2,5/5] drm/msm: subclass work object for vblank events

Submitted by Sean Paul on Nov. 29, 2018, 10:15 p.m.

Details

Message ID 20181129221511.GI154160@art_vandelay
State New
Headers show
Series "Series without cover letter" ( rev: 2 ) in DRI devel

Not browsing as part of any series.

Commit Message

Sean Paul Nov. 29, 2018, 10:15 p.m.
On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
> On 2018-11-07 07:55, Sean Paul wrote:
> > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran wrote:
> > > msm maintains a separate structure to define vblank
> > > work definitions and a list to track events submitted
> > > to the workqueue. We can avoid this redundant list
> > > and its protection mechanism, if we subclass the
> > > work object to encapsulate vblank event parameters.
> > > 
> > > changes in v2:
> > > 	- subclass optimization on system wq (Sean Paul)
> > 
> > I wouldn't do it like this, tbh. One problem is that you've lost your
> > flush() on
> > unbind, so there's no way to know if you have workers in the wild
> > waiting
> > to
> > enable/disable vblank.
> > 
> > Another issues is that AFAICT, we don't need a queue of
> > enables/disables,
> > but
> > rather just the last requested state (ie: should we be on or off). So
> > things
> > don't need to be this complicated (and we're possibly thrashing vblank
> > on/off
> > for no reason).
> > 
> > I'm still of the mind that you should just make this synchronous and be
> > done
> > with the threads (especially since we're still uncovering/introducing
> > races!).
> > 
> While scoping out the effort to make vblank events synchronous, I found
> that the spinlock locking order of vblank request sequence and vblank
> callback
> sequences are the opposite.
> 
> In DPU, drm_vblank_enable acquires vblank_time_lock before registering
> the crtc to encoder which happens after acquiring encoder_spinlock. But
> the vblank_callback acquires encoder_spinlock before accessing the
> registered
> crtc and calling into drm_vblank_handler which tries to acquire
> vblank_time_lock.
> Acquiring both vblank_time_lock and encoder_spinlock in the same thread
> is leading to deadlock.

Hmm, I'm not sure I follow. Are you seeing issues where irq overlaps with
enable/disable? I hacked in sync vblank enable/disable quickly to see if I could
reproduce what you're seeing, but things seemed well behaved.

I do see that there is a chance to call drm_handle_vblank() while holding
enc_spinlock, but couldn't find any obvious lock recursion there.

Maybe a callstack or lockdep splat would help?

Sean


Here's my hack to bypass the display thread:



> 
> In MDP5, I see the same pattern between vblank_time_lock and list_lock which
> is used to track the irq handlers.
> 
> I believe that explains why msm_drv is queuing the vblank enable/disable
> works to WQ after acquiring vblank_time_lock.
> 
> Thanks,
> Jeykumar S.
> 
> > Sean
> > 
> > > 
> > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
> > > ---
> > >  drivers/gpu/drm/msm/msm_drv.c | 67
> > +++++++++++++------------------------------
> > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
> > >  2 files changed, 20 insertions(+), 54 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > b/drivers/gpu/drm/msm/msm_drv.c
> > > index 6d6c73b..8da5be2 100644
> > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem *addr)
> > >  	return val;
> > >  }
> > > 
> > > -struct vblank_event {
> > > -	struct list_head node;
> > > +struct msm_vblank_work {
> > > +	struct work_struct work;
> > >  	int crtc_id;
> > >  	bool enable;
> > > +	struct msm_drm_private *priv;
> > >  };
> > > 
> > >  static void vblank_ctrl_worker(struct work_struct *work)
> > >  {
> > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
> > > -						struct msm_vblank_ctrl,
> > work);
> > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
> > > -					struct msm_drm_private,
> > vblank_ctrl);
> > > +	struct msm_vblank_work *vbl_work = container_of(work,
> > > +						struct msm_vblank_work,
> > work);
> > > +	struct msm_drm_private *priv = vbl_work->priv;
> > >  	struct msm_kms *kms = priv->kms;
> > > -	struct vblank_event *vbl_ev, *tmp;
> > > -	unsigned long flags;
> > > -
> > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
> > {
> > > -		list_del(&vbl_ev->node);
> > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > -
> > > -		if (vbl_ev->enable)
> > > -			kms->funcs->enable_vblank(kms,
> > > -
> > priv->crtcs[vbl_ev->crtc_id]);
> > > -		else
> > > -			kms->funcs->disable_vblank(kms,
> > > -
> > priv->crtcs[vbl_ev->crtc_id]);
> > > 
> > > -		kfree(vbl_ev);
> > > -
> > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > -	}
> > > +	if (vbl_work->enable)
> > > +		kms->funcs->enable_vblank(kms,
> > priv->crtcs[vbl_work->crtc_id]);
> > > +	else
> > > +		kms->funcs->disable_vblank(kms,
> > priv->crtcs[vbl_work->crtc_id]);
> > > 
> > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > +	kfree(vbl_work);
> > >  }
> > > 
> > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> > >  					int crtc_id, bool enable)
> > >  {
> > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > -	struct vblank_event *vbl_ev;
> > > -	unsigned long flags;
> > > +	struct msm_vblank_work *vbl_work;
> > > 
> > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > -	if (!vbl_ev)
> > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
> > > +	if (!vbl_work)
> > >  		return -ENOMEM;
> > > 
> > > -	vbl_ev->crtc_id = crtc_id;
> > > -	vbl_ev->enable = enable;
> > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
> > > 
> > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > +	vbl_work->crtc_id = crtc_id;
> > > +	vbl_work->enable = enable;
> > > +	vbl_work->priv = priv;
> > > 
> > > -	schedule_work(&vbl_ctrl->work);
> > > +	schedule_work(&vbl_work->work);
> > > 
> > >  	return 0;
> > >  }
> > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct device *dev)
> > >  	struct msm_drm_private *priv = ddev->dev_private;
> > >  	struct msm_kms *kms = priv->kms;
> > >  	struct msm_mdss *mdss = priv->mdss;
> > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > -	struct vblank_event *vbl_ev, *tmp;
> > >  	int i;
> > > 
> > >  	/* We must cancel and cleanup any pending vblank enable/disable
> > >  	 * work before drm_irq_uninstall() to avoid work re-enabling an
> > >  	 * irq after uninstall has disabled it.
> > >  	 */
> > > +
> > >  	msm_gem_shrinker_cleanup(ddev);
> > > 
> > >  	drm_kms_helper_poll_fini(ddev);
> > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device *dev)
> > >  #endif
> > >  	drm_mode_config_cleanup(ddev);
> > > 
> > > -	flush_work(&vbl_ctrl->work);
> > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
> > {
> > > -		list_del(&vbl_ev->node);
> > > -		kfree(vbl_ev);
> > > -	}
> > > -
> > >  	/* clean up event worker threads */
> > >  	for (i = 0; i < priv->num_crtcs; i++) {
> > >  		if (priv->event_thread[i].thread) {
> > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device *dev, struct
> > drm_driver *drv)
> > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
> > > 
> > >  	INIT_LIST_HEAD(&priv->inactive_list);
> > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
> > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
> > > -	spin_lock_init(&priv->vblank_ctrl.lock);
> > > 
> > >  	drm_mode_config_init(ddev);
> > > 
> > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
> > b/drivers/gpu/drm/msm/msm_drv.h
> > > index 05d33a7..d4cbde2 100644
> > > --- a/drivers/gpu/drm/msm/msm_drv.h
> > > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
> > >  	PLANE_PROP_MAX_NUM
> > >  };
> > > 
> > > -struct msm_vblank_ctrl {
> > > -	struct work_struct work;
> > > -	struct list_head event_list;
> > > -	spinlock_t lock;
> > > -};
> > > -
> > >  #define MSM_GPU_MAX_RINGS 4
> > >  #define MAX_H_TILES_PER_DISPLAY 2
> > > 
> > > @@ -225,7 +219,6 @@ struct msm_drm_private {
> > >  	struct notifier_block vmap_notifier;
> > >  	struct shrinker shrinker;
> > > 
> > > -	struct msm_vblank_ctrl vblank_ctrl;
> > >  	struct drm_atomic_state *pm_state;
> > >  };
> > > 
> > > --
> > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> > Forum,
> > > a Linux Foundation Collaborative Project
> > > 
> > > _______________________________________________
> > > Freedreno mailing list
> > > Freedreno@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/freedreno
> 
> -- 
> Jeykumar S

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9c9f7ff6960b38..5a3cac5825319e 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -242,24 +242,19 @@  static void vblank_ctrl_worker(struct kthread_work *work)
 static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
 					int crtc_id, bool enable)
 {
+	struct msm_kms *kms = priv->kms;
 	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
-	struct vblank_event *vbl_ev;
 	unsigned long flags;
 
-	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
-	if (!vbl_ev)
-		return -ENOMEM;
+	spin_lock_irqsave(&vbl_ctrl->lock, flags);
 
-	vbl_ev->crtc_id = crtc_id;
-	vbl_ev->enable = enable;
+	if (enable)
+		kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
+	else
+		kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
 
-	spin_lock_irqsave(&vbl_ctrl->lock, flags);
-	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
 	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
 
-	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
-			&vbl_ctrl->work);
-
 	return 0;
 }


Comments

On 2018-11-29 14:15, Sean Paul wrote:
> On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
>> On 2018-11-07 07:55, Sean Paul wrote:
>> > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran wrote:
>> > > msm maintains a separate structure to define vblank
>> > > work definitions and a list to track events submitted
>> > > to the workqueue. We can avoid this redundant list
>> > > and its protection mechanism, if we subclass the
>> > > work object to encapsulate vblank event parameters.
>> > >
>> > > changes in v2:
>> > > 	- subclass optimization on system wq (Sean Paul)
>> >
>> > I wouldn't do it like this, tbh. One problem is that you've lost your
>> > flush() on
>> > unbind, so there's no way to know if you have workers in the wild
>> > waiting
>> > to
>> > enable/disable vblank.
>> >
>> > Another issues is that AFAICT, we don't need a queue of
>> > enables/disables,
>> > but
>> > rather just the last requested state (ie: should we be on or off). So
>> > things
>> > don't need to be this complicated (and we're possibly thrashing vblank
>> > on/off
>> > for no reason).
>> >
>> > I'm still of the mind that you should just make this synchronous and
> be
>> > done
>> > with the threads (especially since we're still uncovering/introducing
>> > races!).
>> >
>> While scoping out the effort to make vblank events synchronous, I 
>> found
>> that the spinlock locking order of vblank request sequence and vblank
>> callback
>> sequences are the opposite.
>> 
>> In DPU, drm_vblank_enable acquires vblank_time_lock before registering
>> the crtc to encoder which happens after acquiring encoder_spinlock. 
>> But
>> the vblank_callback acquires encoder_spinlock before accessing the
>> registered
>> crtc and calling into drm_vblank_handler which tries to acquire
>> vblank_time_lock.
>> Acquiring both vblank_time_lock and encoder_spinlock in the same 
>> thread
>> is leading to deadlock.
> 
> Hmm, I'm not sure I follow. Are you seeing issues where irq overlaps 
> with
> enable/disable? I hacked in sync vblank enable/disable quickly to see 
> if I
> could
> reproduce what you're seeing, but things seemed well behaved.
> 

The race is between drm_vblank_get/put and vblank_handler contexts.

When made synchronous:

while calling drm_vblank_get, the callstack looks like below:
drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) -> 
__enable_vblank -> dpu_crtc_vblank -> dpu_encoder_toggle_vblank_for_crtc 
(tries to acquire enc_spinlock)

In vblank handler, the call stack will be:
dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback (acquires 
enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank (tries to 
acquire vblank_time_lock)


> I do see that there is a chance to call drm_handle_vblank() while 
> holding
> enc_spinlock, but couldn't find any obvious lock recursion there.
> 
> Maybe a callstack or lockdep splat would help?
> 
> Sean
> 
> 
> Here's my hack to bypass the display thread:
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c 
> b/drivers/gpu/drm/msm/msm_drv.c
> index 9c9f7ff6960b38..5a3cac5825319e 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct 
> kthread_work
> *work)
>  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
>  					int crtc_id, bool enable)
>  {
> +	struct msm_kms *kms = priv->kms;
>  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> -	struct vblank_event *vbl_ev;
>  	unsigned long flags;
> 
> -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> -	if (!vbl_ev)
> -		return -ENOMEM;
> +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> 
> -	vbl_ev->crtc_id = crtc_id;
> -	vbl_ev->enable = enable;
> +	if (enable)
> +		kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
> +	else
> +		kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
> 
> -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> 
> -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
> -			&vbl_ctrl->work);
> -
>  	return 0;
>  }
> 
Even with your patch above, I see frame is getting stuck but it recovers 
in a while.
The patch I tried was assigning 
crtc->funcs->enable_vblank/disable_vblank so that
__enable_vblank can call crtc directly. But the above callstack is still
valid for your patch.

Thanks,
Jeykumar S.
> 
> 
>> 
>> In MDP5, I see the same pattern between vblank_time_lock and list_lock
> which
>> is used to track the irq handlers.
>> 
>> I believe that explains why msm_drv is queuing the vblank 
>> enable/disable
>> works to WQ after acquiring vblank_time_lock.
>> 
>> Thanks,
>> Jeykumar S.
>> 
>> > Sean
>> >
>> > >
>> > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
>> > > ---
>> > >  drivers/gpu/drm/msm/msm_drv.c | 67
>> > +++++++++++++------------------------------
>> > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
>> > >  2 files changed, 20 insertions(+), 54 deletions(-)
>> > >
>> > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
>> > b/drivers/gpu/drm/msm/msm_drv.c
>> > > index 6d6c73b..8da5be2 100644
>> > > --- a/drivers/gpu/drm/msm/msm_drv.c
>> > > +++ b/drivers/gpu/drm/msm/msm_drv.c
>> > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem *addr)
>> > >  	return val;
>> > >  }
>> > >
>> > > -struct vblank_event {
>> > > -	struct list_head node;
>> > > +struct msm_vblank_work {
>> > > +	struct work_struct work;
>> > >  	int crtc_id;
>> > >  	bool enable;
>> > > +	struct msm_drm_private *priv;
>> > >  };
>> > >
>> > >  static void vblank_ctrl_worker(struct work_struct *work)
>> > >  {
>> > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
>> > > -						struct msm_vblank_ctrl,
>> > work);
>> > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
>> > > -					struct msm_drm_private,
>> > vblank_ctrl);
>> > > +	struct msm_vblank_work *vbl_work = container_of(work,
>> > > +						struct msm_vblank_work,
>> > work);
>> > > +	struct msm_drm_private *priv = vbl_work->priv;
>> > >  	struct msm_kms *kms = priv->kms;
>> > > -	struct vblank_event *vbl_ev, *tmp;
>> > > -	unsigned long flags;
>> > > -
>> > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
>> > {
>> > > -		list_del(&vbl_ev->node);
>> > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > -
>> > > -		if (vbl_ev->enable)
>> > > -			kms->funcs->enable_vblank(kms,
>> > > -
>> > priv->crtcs[vbl_ev->crtc_id]);
>> > > -		else
>> > > -			kms->funcs->disable_vblank(kms,
>> > > -
>> > priv->crtcs[vbl_ev->crtc_id]);
>> > >
>> > > -		kfree(vbl_ev);
>> > > -
>> > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > -	}
>> > > +	if (vbl_work->enable)
>> > > +		kms->funcs->enable_vblank(kms,
>> > priv->crtcs[vbl_work->crtc_id]);
>> > > +	else
>> > > +		kms->funcs->disable_vblank(kms,
>> > priv->crtcs[vbl_work->crtc_id]);
>> > >
>> > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > +	kfree(vbl_work);
>> > >  }
>> > >
>> > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
>> > >  					int crtc_id, bool enable)
>> > >  {
>> > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > -	struct vblank_event *vbl_ev;
>> > > -	unsigned long flags;
>> > > +	struct msm_vblank_work *vbl_work;
>> > >
>> > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
>> > > -	if (!vbl_ev)
>> > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
>> > > +	if (!vbl_work)
>> > >  		return -ENOMEM;
>> > >
>> > > -	vbl_ev->crtc_id = crtc_id;
>> > > -	vbl_ev->enable = enable;
>> > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
>> > >
>> > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>> > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > +	vbl_work->crtc_id = crtc_id;
>> > > +	vbl_work->enable = enable;
>> > > +	vbl_work->priv = priv;
>> > >
>> > > -	schedule_work(&vbl_ctrl->work);
>> > > +	schedule_work(&vbl_work->work);
>> > >
>> > >  	return 0;
>> > >  }
>> > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct device *dev)
>> > >  	struct msm_drm_private *priv = ddev->dev_private;
>> > >  	struct msm_kms *kms = priv->kms;
>> > >  	struct msm_mdss *mdss = priv->mdss;
>> > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > -	struct vblank_event *vbl_ev, *tmp;
>> > >  	int i;
>> > >
>> > >  	/* We must cancel and cleanup any pending vblank enable/disable
>> > >  	 * work before drm_irq_uninstall() to avoid work re-enabling an
>> > >  	 * irq after uninstall has disabled it.
>> > >  	 */
>> > > +
>> > >  	msm_gem_shrinker_cleanup(ddev);
>> > >
>> > >  	drm_kms_helper_poll_fini(ddev);
>> > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device *dev)
>> > >  #endif
>> > >  	drm_mode_config_cleanup(ddev);
>> > >
>> > > -	flush_work(&vbl_ctrl->work);
>> > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
>> > {
>> > > -		list_del(&vbl_ev->node);
>> > > -		kfree(vbl_ev);
>> > > -	}
>> > > -
>> > >  	/* clean up event worker threads */
>> > >  	for (i = 0; i < priv->num_crtcs; i++) {
>> > >  		if (priv->event_thread[i].thread) {
>> > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device *dev,
> struct
>> > drm_driver *drv)
>> > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
>> > >
>> > >  	INIT_LIST_HEAD(&priv->inactive_list);
>> > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
>> > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
>> > > -	spin_lock_init(&priv->vblank_ctrl.lock);
>> > >
>> > >  	drm_mode_config_init(ddev);
>> > >
>> > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
>> > b/drivers/gpu/drm/msm/msm_drv.h
>> > > index 05d33a7..d4cbde2 100644
>> > > --- a/drivers/gpu/drm/msm/msm_drv.h
>> > > +++ b/drivers/gpu/drm/msm/msm_drv.h
>> > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
>> > >  	PLANE_PROP_MAX_NUM
>> > >  };
>> > >
>> > > -struct msm_vblank_ctrl {
>> > > -	struct work_struct work;
>> > > -	struct list_head event_list;
>> > > -	spinlock_t lock;
>> > > -};
>> > > -
>> > >  #define MSM_GPU_MAX_RINGS 4
>> > >  #define MAX_H_TILES_PER_DISPLAY 2
>> > >
>> > > @@ -225,7 +219,6 @@ struct msm_drm_private {
>> > >  	struct notifier_block vmap_notifier;
>> > >  	struct shrinker shrinker;
>> > >
>> > > -	struct msm_vblank_ctrl vblank_ctrl;
>> > >  	struct drm_atomic_state *pm_state;
>> > >  };
>> > >
>> > > --
>> > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
>> > Forum,
>> > > a Linux Foundation Collaborative Project
>> > >
>> > > _______________________________________________
>> > > Freedreno mailing list
>> > > Freedreno@lists.freedesktop.org
>> > > https://lists.freedesktop.org/mailman/listinfo/freedreno
>> 
>> --
>> Jeykumar S
On Fri, Nov 30, 2018 at 11:45:55AM -0800, Jeykumar Sankaran wrote:
> On 2018-11-29 14:15, Sean Paul wrote:
> > On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
> > > On 2018-11-07 07:55, Sean Paul wrote:
> > > > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran wrote:
> > > > > msm maintains a separate structure to define vblank
> > > > > work definitions and a list to track events submitted
> > > > > to the workqueue. We can avoid this redundant list
> > > > > and its protection mechanism, if we subclass the
> > > > > work object to encapsulate vblank event parameters.
> > > > >
> > > > > changes in v2:
> > > > > 	- subclass optimization on system wq (Sean Paul)
> > > >
> > > > I wouldn't do it like this, tbh. One problem is that you've lost your
> > > > flush() on
> > > > unbind, so there's no way to know if you have workers in the wild
> > > > waiting
> > > > to
> > > > enable/disable vblank.
> > > >
> > > > Another issues is that AFAICT, we don't need a queue of
> > > > enables/disables,
> > > > but
> > > > rather just the last requested state (ie: should we be on or off). So
> > > > things
> > > > don't need to be this complicated (and we're possibly thrashing vblank
> > > > on/off
> > > > for no reason).
> > > >
> > > > I'm still of the mind that you should just make this synchronous and
> > be
> > > > done
> > > > with the threads (especially since we're still uncovering/introducing
> > > > races!).
> > > >
> > > While scoping out the effort to make vblank events synchronous, I
> > > found
> > > that the spinlock locking order of vblank request sequence and vblank
> > > callback
> > > sequences are the opposite.
> > > 
> > > In DPU, drm_vblank_enable acquires vblank_time_lock before registering
> > > the crtc to encoder which happens after acquiring encoder_spinlock.
> > > But
> > > the vblank_callback acquires encoder_spinlock before accessing the
> > > registered
> > > crtc and calling into drm_vblank_handler which tries to acquire
> > > vblank_time_lock.
> > > Acquiring both vblank_time_lock and encoder_spinlock in the same
> > > thread
> > > is leading to deadlock.
> > 
> > Hmm, I'm not sure I follow. Are you seeing issues where irq overlaps
> > with
> > enable/disable? I hacked in sync vblank enable/disable quickly to see if
> > I
> > could
> > reproduce what you're seeing, but things seemed well behaved.
> > 
> 
> The race is between drm_vblank_get/put and vblank_handler contexts.
> 
> When made synchronous:
> 
> while calling drm_vblank_get, the callstack looks like below:
> drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) ->
> __enable_vblank -> dpu_crtc_vblank -> dpu_encoder_toggle_vblank_for_crtc
> (tries to acquire enc_spinlock)
> 
> In vblank handler, the call stack will be:
> dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback (acquires
> enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank (tries to
> acquire vblank_time_lock)

Hmm, I'm not sure how this can happen. We acquire and release the enc_spinlock
before enabling the irq, yes we will hold on to the vbl_time_lock, but we
shouldn't be trying to reacquire an encoder's spinlock after we've enabled it.
I don't know how that can deadlock, since we should never be running enable and
the handler concurrently.

The only thing I can think of is that the vblank interrupts are firing after
vblank has been disabled? In that case, it seems like we should properly flush
them.

Sean


> 
> 
> > I do see that there is a chance to call drm_handle_vblank() while
> > holding
> > enc_spinlock, but couldn't find any obvious lock recursion there.
> > 
> > Maybe a callstack or lockdep splat would help?
> > 
> > Sean
> > 
> > 
> > Here's my hack to bypass the display thread:
> > 
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > b/drivers/gpu/drm/msm/msm_drv.c
> > index 9c9f7ff6960b38..5a3cac5825319e 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct kthread_work
> > *work)
> >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> >  					int crtc_id, bool enable)
> >  {
> > +	struct msm_kms *kms = priv->kms;
> >  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > -	struct vblank_event *vbl_ev;
> >  	unsigned long flags;
> > 
> > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > -	if (!vbl_ev)
> > -		return -ENOMEM;
> > +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > 
> > -	vbl_ev->crtc_id = crtc_id;
> > -	vbl_ev->enable = enable;
> > +	if (enable)
> > +		kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
> > +	else
> > +		kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
> > 
> > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> >  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > 
> > -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
> > -			&vbl_ctrl->work);
> > -
> >  	return 0;
> >  }
> > 
> Even with your patch above, I see frame is getting stuck but it recovers in
> a while.
> The patch I tried was assigning crtc->funcs->enable_vblank/disable_vblank so
> that
> __enable_vblank can call crtc directly. But the above callstack is still
> valid for your patch.
> 
> Thanks,
> Jeykumar S.
> > 
> > 
> > > 
> > > In MDP5, I see the same pattern between vblank_time_lock and list_lock
> > which
> > > is used to track the irq handlers.
> > > 
> > > I believe that explains why msm_drv is queuing the vblank
> > > enable/disable
> > > works to WQ after acquiring vblank_time_lock.
> > > 
> > > Thanks,
> > > Jeykumar S.
> > > 
> > > > Sean
> > > >
> > > > >
> > > > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
> > > > > ---
> > > > >  drivers/gpu/drm/msm/msm_drv.c | 67
> > > > +++++++++++++------------------------------
> > > > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
> > > > >  2 files changed, 20 insertions(+), 54 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > index 6d6c73b..8da5be2 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem *addr)
> > > > >  	return val;
> > > > >  }
> > > > >
> > > > > -struct vblank_event {
> > > > > -	struct list_head node;
> > > > > +struct msm_vblank_work {
> > > > > +	struct work_struct work;
> > > > >  	int crtc_id;
> > > > >  	bool enable;
> > > > > +	struct msm_drm_private *priv;
> > > > >  };
> > > > >
> > > > >  static void vblank_ctrl_worker(struct work_struct *work)
> > > > >  {
> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
> > > > > -						struct msm_vblank_ctrl,
> > > > work);
> > > > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
> > > > > -					struct msm_drm_private,
> > > > vblank_ctrl);
> > > > > +	struct msm_vblank_work *vbl_work = container_of(work,
> > > > > +						struct msm_vblank_work,
> > > > work);
> > > > > +	struct msm_drm_private *priv = vbl_work->priv;
> > > > >  	struct msm_kms *kms = priv->kms;
> > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > > -	unsigned long flags;
> > > > > -
> > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
> > > > {
> > > > > -		list_del(&vbl_ev->node);
> > > > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > -
> > > > > -		if (vbl_ev->enable)
> > > > > -			kms->funcs->enable_vblank(kms,
> > > > > -
> > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > > -		else
> > > > > -			kms->funcs->disable_vblank(kms,
> > > > > -
> > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > >
> > > > > -		kfree(vbl_ev);
> > > > > -
> > > > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > -	}
> > > > > +	if (vbl_work->enable)
> > > > > +		kms->funcs->enable_vblank(kms,
> > > > priv->crtcs[vbl_work->crtc_id]);
> > > > > +	else
> > > > > +		kms->funcs->disable_vblank(kms,
> > > > priv->crtcs[vbl_work->crtc_id]);
> > > > >
> > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > +	kfree(vbl_work);
> > > > >  }
> > > > >
> > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> > > > >  					int crtc_id, bool enable)
> > > > >  {
> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > -	struct vblank_event *vbl_ev;
> > > > > -	unsigned long flags;
> > > > > +	struct msm_vblank_work *vbl_work;
> > > > >
> > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > > > -	if (!vbl_ev)
> > > > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
> > > > > +	if (!vbl_work)
> > > > >  		return -ENOMEM;
> > > > >
> > > > > -	vbl_ev->crtc_id = crtc_id;
> > > > > -	vbl_ev->enable = enable;
> > > > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
> > > > >
> > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > +	vbl_work->crtc_id = crtc_id;
> > > > > +	vbl_work->enable = enable;
> > > > > +	vbl_work->priv = priv;
> > > > >
> > > > > -	schedule_work(&vbl_ctrl->work);
> > > > > +	schedule_work(&vbl_work->work);
> > > > >
> > > > >  	return 0;
> > > > >  }
> > > > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct device *dev)
> > > > >  	struct msm_drm_private *priv = ddev->dev_private;
> > > > >  	struct msm_kms *kms = priv->kms;
> > > > >  	struct msm_mdss *mdss = priv->mdss;
> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > >  	int i;
> > > > >
> > > > >  	/* We must cancel and cleanup any pending vblank enable/disable
> > > > >  	 * work before drm_irq_uninstall() to avoid work re-enabling an
> > > > >  	 * irq after uninstall has disabled it.
> > > > >  	 */
> > > > > +
> > > > >  	msm_gem_shrinker_cleanup(ddev);
> > > > >
> > > > >  	drm_kms_helper_poll_fini(ddev);
> > > > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device *dev)
> > > > >  #endif
> > > > >  	drm_mode_config_cleanup(ddev);
> > > > >
> > > > > -	flush_work(&vbl_ctrl->work);
> > > > > -	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node)
> > > > {
> > > > > -		list_del(&vbl_ev->node);
> > > > > -		kfree(vbl_ev);
> > > > > -	}
> > > > > -
> > > > >  	/* clean up event worker threads */
> > > > >  	for (i = 0; i < priv->num_crtcs; i++) {
> > > > >  		if (priv->event_thread[i].thread) {
> > > > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device *dev,
> > struct
> > > > drm_driver *drv)
> > > > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
> > > > >
> > > > >  	INIT_LIST_HEAD(&priv->inactive_list);
> > > > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
> > > > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
> > > > > -	spin_lock_init(&priv->vblank_ctrl.lock);
> > > > >
> > > > >  	drm_mode_config_init(ddev);
> > > > >
> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
> > > > b/drivers/gpu/drm/msm/msm_drv.h
> > > > > index 05d33a7..d4cbde2 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_drv.h
> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > > > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
> > > > >  	PLANE_PROP_MAX_NUM
> > > > >  };
> > > > >
> > > > > -struct msm_vblank_ctrl {
> > > > > -	struct work_struct work;
> > > > > -	struct list_head event_list;
> > > > > -	spinlock_t lock;
> > > > > -};
> > > > > -
> > > > >  #define MSM_GPU_MAX_RINGS 4
> > > > >  #define MAX_H_TILES_PER_DISPLAY 2
> > > > >
> > > > > @@ -225,7 +219,6 @@ struct msm_drm_private {
> > > > >  	struct notifier_block vmap_notifier;
> > > > >  	struct shrinker shrinker;
> > > > >
> > > > > -	struct msm_vblank_ctrl vblank_ctrl;
> > > > >  	struct drm_atomic_state *pm_state;
> > > > >  };
> > > > >
> > > > > --
> > > > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> > > > Forum,
> > > > > a Linux Foundation Collaborative Project
> > > > >
> > > > > _______________________________________________
> > > > > Freedreno mailing list
> > > > > Freedreno@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/freedreno
> > > 
> > > --
> > > Jeykumar S
> 
> -- 
> Jeykumar S
On 2018-11-30 12:07, Sean Paul wrote:
> On Fri, Nov 30, 2018 at 11:45:55AM -0800, Jeykumar Sankaran wrote:
>> On 2018-11-29 14:15, Sean Paul wrote:
>> > On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
>> > > On 2018-11-07 07:55, Sean Paul wrote:
>> > > > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran wrote:
>> > > > > msm maintains a separate structure to define vblank
>> > > > > work definitions and a list to track events submitted
>> > > > > to the workqueue. We can avoid this redundant list
>> > > > > and its protection mechanism, if we subclass the
>> > > > > work object to encapsulate vblank event parameters.
>> > > > >
>> > > > > changes in v2:
>> > > > > 	- subclass optimization on system wq (Sean Paul)
>> > > >
>> > > > I wouldn't do it like this, tbh. One problem is that you've lost
> your
>> > > > flush() on
>> > > > unbind, so there's no way to know if you have workers in the wild
>> > > > waiting
>> > > > to
>> > > > enable/disable vblank.
>> > > >
>> > > > Another issues is that AFAICT, we don't need a queue of
>> > > > enables/disables,
>> > > > but
>> > > > rather just the last requested state (ie: should we be on or off).
> So
>> > > > things
>> > > > don't need to be this complicated (and we're possibly thrashing
> vblank
>> > > > on/off
>> > > > for no reason).
>> > > >
>> > > > I'm still of the mind that you should just make this synchronous
> and
>> > be
>> > > > done
>> > > > with the threads (especially since we're still
> uncovering/introducing
>> > > > races!).
>> > > >
>> > > While scoping out the effort to make vblank events synchronous, I
>> > > found
>> > > that the spinlock locking order of vblank request sequence and
> vblank
>> > > callback
>> > > sequences are the opposite.
>> > >
>> > > In DPU, drm_vblank_enable acquires vblank_time_lock before
> registering
>> > > the crtc to encoder which happens after acquiring encoder_spinlock.
>> > > But
>> > > the vblank_callback acquires encoder_spinlock before accessing the
>> > > registered
>> > > crtc and calling into drm_vblank_handler which tries to acquire
>> > > vblank_time_lock.
>> > > Acquiring both vblank_time_lock and encoder_spinlock in the same
>> > > thread
>> > > is leading to deadlock.
>> >
>> > Hmm, I'm not sure I follow. Are you seeing issues where irq overlaps
>> > with
>> > enable/disable? I hacked in sync vblank enable/disable quickly to see
> if
>> > I
>> > could
>> > reproduce what you're seeing, but things seemed well behaved.
>> >
>> 
>> The race is between drm_vblank_get/put and vblank_handler contexts.
>> 
>> When made synchronous:
>> 
>> while calling drm_vblank_get, the callstack looks like below:
>> drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) ->
>> __enable_vblank -> dpu_crtc_vblank -> 
>> dpu_encoder_toggle_vblank_for_crtc
>> (tries to acquire enc_spinlock)
>> 
>> In vblank handler, the call stack will be:
>> dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback 
>> (acquires
>> enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank (tries 
>> to
>> acquire vblank_time_lock)
> 
> Hmm, I'm not sure how this can happen. We acquire and release the
> enc_spinlock
> before enabling the irq, yes we will hold on to the vbl_time_lock, but 
> we
> shouldn't be trying to reacquire an encoder's spinlock after we've 
> enabled
> it.
In the synchronous approach dpu_encoder_toggle_vblank_for_crtc(which 
acquires the enc_spinlock) will be called while we
are holding the vbl_time_lock.

> I don't know how that can deadlock, since we should never be running
> enable and
> the handler concurrently.
> 
I agree that vblank_irq handler should not be running before the enable 
sequence. But
don't you expect the handler to be running while calling the 
vblank_disable sequence?
vbl disable will try to acquire the locks in the opposite order to that 
of irq_handler and the
same issue is bound to happen.

With your patch, you should be able to simulate this deadlock if you can 
inject a delay
by adding a pr_err log in vblank_ctrl_queue_work

Thanks,
Jeykumar S.

> The only thing I can think of is that the vblank interrupts are firing
> after
> vblank has been disabled? In that case, it seems like we should 
> properly
> flush
> them.
> 
> Sean
> 
> 
>> 
>> 
>> > I do see that there is a chance to call drm_handle_vblank() while
>> > holding
>> > enc_spinlock, but couldn't find any obvious lock recursion there.
>> >
>> > Maybe a callstack or lockdep splat would help?
>> >
>> > Sean
>> >
>> >
>> > Here's my hack to bypass the display thread:
>> >
>> > diff --git a/drivers/gpu/drm/msm/msm_drv.c
>> > b/drivers/gpu/drm/msm/msm_drv.c
>> > index 9c9f7ff6960b38..5a3cac5825319e 100644
>> > --- a/drivers/gpu/drm/msm/msm_drv.c
>> > +++ b/drivers/gpu/drm/msm/msm_drv.c
>> > @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct
> kthread_work
>> > *work)
>> >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
>> >  					int crtc_id, bool enable)
>> >  {
>> > +	struct msm_kms *kms = priv->kms;
>> >  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > -	struct vblank_event *vbl_ev;
>> >  	unsigned long flags;
>> >
>> > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
>> > -	if (!vbl_ev)
>> > -		return -ENOMEM;
>> > +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> >
>> > -	vbl_ev->crtc_id = crtc_id;
>> > -	vbl_ev->enable = enable;
>> > +	if (enable)
>> > +		kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
>> > +	else
>> > +		kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
>> >
>> > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>> >  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> >
>> > -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
>> > -			&vbl_ctrl->work);
>> > -
>> >  	return 0;
>> >  }
>> >
>> Even with your patch above, I see frame is getting stuck but it 
>> recovers
> in
>> a while.
>> The patch I tried was assigning
> crtc->funcs->enable_vblank/disable_vblank so
>> that
>> __enable_vblank can call crtc directly. But the above callstack is 
>> still
>> valid for your patch.
>> 
>> Thanks,
>> Jeykumar S.
>> >
>> >
>> > >
>> > > In MDP5, I see the same pattern between vblank_time_lock and
> list_lock
>> > which
>> > > is used to track the irq handlers.
>> > >
>> > > I believe that explains why msm_drv is queuing the vblank
>> > > enable/disable
>> > > works to WQ after acquiring vblank_time_lock.
>> > >
>> > > Thanks,
>> > > Jeykumar S.
>> > >
>> > > > Sean
>> > > >
>> > > > >
>> > > > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
>> > > > > ---
>> > > > >  drivers/gpu/drm/msm/msm_drv.c | 67
>> > > > +++++++++++++------------------------------
>> > > > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
>> > > > >  2 files changed, 20 insertions(+), 54 deletions(-)
>> > > > >
>> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
>> > > > b/drivers/gpu/drm/msm/msm_drv.c
>> > > > > index 6d6c73b..8da5be2 100644
>> > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
>> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
>> > > > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem *addr)
>> > > > >  	return val;
>> > > > >  }
>> > > > >
>> > > > > -struct vblank_event {
>> > > > > -	struct list_head node;
>> > > > > +struct msm_vblank_work {
>> > > > > +	struct work_struct work;
>> > > > >  	int crtc_id;
>> > > > >  	bool enable;
>> > > > > +	struct msm_drm_private *priv;
>> > > > >  };
>> > > > >
>> > > > >  static void vblank_ctrl_worker(struct work_struct *work)
>> > > > >  {
>> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
>> > > > > -						struct
> msm_vblank_ctrl,
>> > > > work);
>> > > > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
>> > > > > -					struct msm_drm_private,
>> > > > vblank_ctrl);
>> > > > > +	struct msm_vblank_work *vbl_work = container_of(work,
>> > > > > +						struct
> msm_vblank_work,
>> > > > work);
>> > > > > +	struct msm_drm_private *priv = vbl_work->priv;
>> > > > >  	struct msm_kms *kms = priv->kms;
>> > > > > -	struct vblank_event *vbl_ev, *tmp;
>> > > > > -	unsigned long flags;
>> > > > > -
>> > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> &vbl_ctrl->event_list, node)
>> > > > {
>> > > > > -		list_del(&vbl_ev->node);
>> > > > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > -
>> > > > > -		if (vbl_ev->enable)
>> > > > > -			kms->funcs->enable_vblank(kms,
>> > > > > -
>> > > > priv->crtcs[vbl_ev->crtc_id]);
>> > > > > -		else
>> > > > > -			kms->funcs->disable_vblank(kms,
>> > > > > -
>> > > > priv->crtcs[vbl_ev->crtc_id]);
>> > > > >
>> > > > > -		kfree(vbl_ev);
>> > > > > -
>> > > > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > -	}
>> > > > > +	if (vbl_work->enable)
>> > > > > +		kms->funcs->enable_vblank(kms,
>> > > > priv->crtcs[vbl_work->crtc_id]);
>> > > > > +	else
>> > > > > +		kms->funcs->disable_vblank(kms,
>> > > > priv->crtcs[vbl_work->crtc_id]);
>> > > > >
>> > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > +	kfree(vbl_work);
>> > > > >  }
>> > > > >
>> > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
>> > > > >  					int crtc_id, bool enable)
>> > > > >  {
>> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > > > -	struct vblank_event *vbl_ev;
>> > > > > -	unsigned long flags;
>> > > > > +	struct msm_vblank_work *vbl_work;
>> > > > >
>> > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
>> > > > > -	if (!vbl_ev)
>> > > > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
>> > > > > +	if (!vbl_work)
>> > > > >  		return -ENOMEM;
>> > > > >
>> > > > > -	vbl_ev->crtc_id = crtc_id;
>> > > > > -	vbl_ev->enable = enable;
>> > > > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
>> > > > >
>> > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>> > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > +	vbl_work->crtc_id = crtc_id;
>> > > > > +	vbl_work->enable = enable;
>> > > > > +	vbl_work->priv = priv;
>> > > > >
>> > > > > -	schedule_work(&vbl_ctrl->work);
>> > > > > +	schedule_work(&vbl_work->work);
>> > > > >
>> > > > >  	return 0;
>> > > > >  }
>> > > > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct device
> *dev)
>> > > > >  	struct msm_drm_private *priv = ddev->dev_private;
>> > > > >  	struct msm_kms *kms = priv->kms;
>> > > > >  	struct msm_mdss *mdss = priv->mdss;
>> > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > > > -	struct vblank_event *vbl_ev, *tmp;
>> > > > >  	int i;
>> > > > >
>> > > > >  	/* We must cancel and cleanup any pending vblank
> enable/disable
>> > > > >  	 * work before drm_irq_uninstall() to avoid work
> re-enabling an
>> > > > >  	 * irq after uninstall has disabled it.
>> > > > >  	 */
>> > > > > +
>> > > > >  	msm_gem_shrinker_cleanup(ddev);
>> > > > >
>> > > > >  	drm_kms_helper_poll_fini(ddev);
>> > > > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device
> *dev)
>> > > > >  #endif
>> > > > >  	drm_mode_config_cleanup(ddev);
>> > > > >
>> > > > > -	flush_work(&vbl_ctrl->work);
>> > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> &vbl_ctrl->event_list, node)
>> > > > {
>> > > > > -		list_del(&vbl_ev->node);
>> > > > > -		kfree(vbl_ev);
>> > > > > -	}
>> > > > > -
>> > > > >  	/* clean up event worker threads */
>> > > > >  	for (i = 0; i < priv->num_crtcs; i++) {
>> > > > >  		if (priv->event_thread[i].thread) {
>> > > > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device *dev,
>> > struct
>> > > > drm_driver *drv)
>> > > > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
>> > > > >
>> > > > >  	INIT_LIST_HEAD(&priv->inactive_list);
>> > > > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
>> > > > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
>> > > > > -	spin_lock_init(&priv->vblank_ctrl.lock);
>> > > > >
>> > > > >  	drm_mode_config_init(ddev);
>> > > > >
>> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
>> > > > b/drivers/gpu/drm/msm/msm_drv.h
>> > > > > index 05d33a7..d4cbde2 100644
>> > > > > --- a/drivers/gpu/drm/msm/msm_drv.h
>> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.h
>> > > > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
>> > > > >  	PLANE_PROP_MAX_NUM
>> > > > >  };
>> > > > >
>> > > > > -struct msm_vblank_ctrl {
>> > > > > -	struct work_struct work;
>> > > > > -	struct list_head event_list;
>> > > > > -	spinlock_t lock;
>> > > > > -};
>> > > > > -
>> > > > >  #define MSM_GPU_MAX_RINGS 4
>> > > > >  #define MAX_H_TILES_PER_DISPLAY 2
>> > > > >
>> > > > > @@ -225,7 +219,6 @@ struct msm_drm_private {
>> > > > >  	struct notifier_block vmap_notifier;
>> > > > >  	struct shrinker shrinker;
>> > > > >
>> > > > > -	struct msm_vblank_ctrl vblank_ctrl;
>> > > > >  	struct drm_atomic_state *pm_state;
>> > > > >  };
>> > > > >
>> > > > > --
>> > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> Aurora
>> > > > Forum,
>> > > > > a Linux Foundation Collaborative Project
>> > > > >
>> > > > > _______________________________________________
>> > > > > Freedreno mailing list
>> > > > > Freedreno@lists.freedesktop.org
>> > > > > https://lists.freedesktop.org/mailman/listinfo/freedreno
>> > >
>> > > --
>> > > Jeykumar S
>> 
>> --
>> Jeykumar S
On Fri, Nov 30, 2018 at 04:21:15PM -0800, Jeykumar Sankaran wrote:
> On 2018-11-30 12:07, Sean Paul wrote:
> > On Fri, Nov 30, 2018 at 11:45:55AM -0800, Jeykumar Sankaran wrote:
> > > On 2018-11-29 14:15, Sean Paul wrote:
> > > > On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
> > > > > On 2018-11-07 07:55, Sean Paul wrote:
> > > > > > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran wrote:
> > > > > > > msm maintains a separate structure to define vblank
> > > > > > > work definitions and a list to track events submitted
> > > > > > > to the workqueue. We can avoid this redundant list
> > > > > > > and its protection mechanism, if we subclass the
> > > > > > > work object to encapsulate vblank event parameters.
> > > > > > >
> > > > > > > changes in v2:
> > > > > > > 	- subclass optimization on system wq (Sean Paul)
> > > > > >
> > > > > > I wouldn't do it like this, tbh. One problem is that you've lost
> > your
> > > > > > flush() on
> > > > > > unbind, so there's no way to know if you have workers in the wild
> > > > > > waiting
> > > > > > to
> > > > > > enable/disable vblank.
> > > > > >
> > > > > > Another issues is that AFAICT, we don't need a queue of
> > > > > > enables/disables,
> > > > > > but
> > > > > > rather just the last requested state (ie: should we be on or off).
> > So
> > > > > > things
> > > > > > don't need to be this complicated (and we're possibly thrashing
> > vblank
> > > > > > on/off
> > > > > > for no reason).
> > > > > >
> > > > > > I'm still of the mind that you should just make this synchronous
> > and
> > > > be
> > > > > > done
> > > > > > with the threads (especially since we're still
> > uncovering/introducing
> > > > > > races!).
> > > > > >
> > > > > While scoping out the effort to make vblank events synchronous, I
> > > > > found
> > > > > that the spinlock locking order of vblank request sequence and
> > vblank
> > > > > callback
> > > > > sequences are the opposite.
> > > > >
> > > > > In DPU, drm_vblank_enable acquires vblank_time_lock before
> > registering
> > > > > the crtc to encoder which happens after acquiring encoder_spinlock.
> > > > > But
> > > > > the vblank_callback acquires encoder_spinlock before accessing the
> > > > > registered
> > > > > crtc and calling into drm_vblank_handler which tries to acquire
> > > > > vblank_time_lock.
> > > > > Acquiring both vblank_time_lock and encoder_spinlock in the same
> > > > > thread
> > > > > is leading to deadlock.
> > > >
> > > > Hmm, I'm not sure I follow. Are you seeing issues where irq overlaps
> > > > with
> > > > enable/disable? I hacked in sync vblank enable/disable quickly to see
> > if
> > > > I
> > > > could
> > > > reproduce what you're seeing, but things seemed well behaved.
> > > >
> > > 
> > > The race is between drm_vblank_get/put and vblank_handler contexts.
> > > 
> > > When made synchronous:
> > > 
> > > while calling drm_vblank_get, the callstack looks like below:
> > > drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) ->
> > > __enable_vblank -> dpu_crtc_vblank ->
> > > dpu_encoder_toggle_vblank_for_crtc
> > > (tries to acquire enc_spinlock)
> > > 
> > > In vblank handler, the call stack will be:
> > > dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback
> > > (acquires
> > > enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank
> > > (tries to
> > > acquire vblank_time_lock)
> > 
> > Hmm, I'm not sure how this can happen. We acquire and release the
> > enc_spinlock
> > before enabling the irq, yes we will hold on to the vbl_time_lock, but
> > we
> > shouldn't be trying to reacquire an encoder's spinlock after we've
> > enabled
> > it.
> In the synchronous approach dpu_encoder_toggle_vblank_for_crtc(which
> acquires the enc_spinlock) will be called while we
> are holding the vbl_time_lock.
> 
> > I don't know how that can deadlock, since we should never be running
> > enable and
> > the handler concurrently.
> > 
> I agree that vblank_irq handler should not be running before the enable
> sequence. But
> don't you expect the handler to be running while calling the vblank_disable
> sequence?

This is an entirely different problem though. It's also one that is easier to
fix. I think we could probably grab the enc_spinlock in disable and clear the
crtc pointer.

What I'm getting at is that there's no fundamental reason why we need to have
async vblank enable/disable.

Sean

> vbl disable will try to acquire the locks in the opposite order to that of
> irq_handler and the
> same issue is bound to happen.
> 
> With your patch, you should be able to simulate this deadlock if you can
> inject a delay
> by adding a pr_err log in vblank_ctrl_queue_work
> 
> Thanks,
> Jeykumar S.
> 
> > The only thing I can think of is that the vblank interrupts are firing
> > after
> > vblank has been disabled? In that case, it seems like we should properly
> > flush
> > them.
> > 
> > Sean
> > 
> > 
> > > 
> > > 
> > > > I do see that there is a chance to call drm_handle_vblank() while
> > > > holding
> > > > enc_spinlock, but couldn't find any obvious lock recursion there.
> > > >
> > > > Maybe a callstack or lockdep splat would help?
> > > >
> > > > Sean
> > > >
> > > >
> > > > Here's my hack to bypass the display thread:
> > > >
> > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > index 9c9f7ff6960b38..5a3cac5825319e 100644
> > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct
> > kthread_work
> > > > *work)
> > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> > > >  					int crtc_id, bool enable)
> > > >  {
> > > > +	struct msm_kms *kms = priv->kms;
> > > >  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > -	struct vblank_event *vbl_ev;
> > > >  	unsigned long flags;
> > > >
> > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > > -	if (!vbl_ev)
> > > > -		return -ENOMEM;
> > > > +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > >
> > > > -	vbl_ev->crtc_id = crtc_id;
> > > > -	vbl_ev->enable = enable;
> > > > +	if (enable)
> > > > +		kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
> > > > +	else
> > > > +		kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
> > > >
> > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > >  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > >
> > > > -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
> > > > -			&vbl_ctrl->work);
> > > > -
> > > >  	return 0;
> > > >  }
> > > >
> > > Even with your patch above, I see frame is getting stuck but it
> > > recovers
> > in
> > > a while.
> > > The patch I tried was assigning
> > crtc->funcs->enable_vblank/disable_vblank so
> > > that
> > > __enable_vblank can call crtc directly. But the above callstack is
> > > still
> > > valid for your patch.
> > > 
> > > Thanks,
> > > Jeykumar S.
> > > >
> > > >
> > > > >
> > > > > In MDP5, I see the same pattern between vblank_time_lock and
> > list_lock
> > > > which
> > > > > is used to track the irq handlers.
> > > > >
> > > > > I believe that explains why msm_drv is queuing the vblank
> > > > > enable/disable
> > > > > works to WQ after acquiring vblank_time_lock.
> > > > >
> > > > > Thanks,
> > > > > Jeykumar S.
> > > > >
> > > > > > Sean
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/msm/msm_drv.c | 67
> > > > > > +++++++++++++------------------------------
> > > > > > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
> > > > > > >  2 files changed, 20 insertions(+), 54 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > index 6d6c73b..8da5be2 100644
> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem *addr)
> > > > > > >  	return val;
> > > > > > >  }
> > > > > > >
> > > > > > > -struct vblank_event {
> > > > > > > -	struct list_head node;
> > > > > > > +struct msm_vblank_work {
> > > > > > > +	struct work_struct work;
> > > > > > >  	int crtc_id;
> > > > > > >  	bool enable;
> > > > > > > +	struct msm_drm_private *priv;
> > > > > > >  };
> > > > > > >
> > > > > > >  static void vblank_ctrl_worker(struct work_struct *work)
> > > > > > >  {
> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
> > > > > > > -						struct
> > msm_vblank_ctrl,
> > > > > > work);
> > > > > > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
> > > > > > > -					struct msm_drm_private,
> > > > > > vblank_ctrl);
> > > > > > > +	struct msm_vblank_work *vbl_work = container_of(work,
> > > > > > > +						struct
> > msm_vblank_work,
> > > > > > work);
> > > > > > > +	struct msm_drm_private *priv = vbl_work->priv;
> > > > > > >  	struct msm_kms *kms = priv->kms;
> > > > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > > > > -	unsigned long flags;
> > > > > > > -
> > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> > &vbl_ctrl->event_list, node)
> > > > > > {
> > > > > > > -		list_del(&vbl_ev->node);
> > > > > > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > -
> > > > > > > -		if (vbl_ev->enable)
> > > > > > > -			kms->funcs->enable_vblank(kms,
> > > > > > > -
> > > > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > > > > -		else
> > > > > > > -			kms->funcs->disable_vblank(kms,
> > > > > > > -
> > > > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > > > >
> > > > > > > -		kfree(vbl_ev);
> > > > > > > -
> > > > > > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > -	}
> > > > > > > +	if (vbl_work->enable)
> > > > > > > +		kms->funcs->enable_vblank(kms,
> > > > > > priv->crtcs[vbl_work->crtc_id]);
> > > > > > > +	else
> > > > > > > +		kms->funcs->disable_vblank(kms,
> > > > > > priv->crtcs[vbl_work->crtc_id]);
> > > > > > >
> > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > +	kfree(vbl_work);
> > > > > > >  }
> > > > > > >
> > > > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> > > > > > >  					int crtc_id, bool enable)
> > > > > > >  {
> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > > > -	struct vblank_event *vbl_ev;
> > > > > > > -	unsigned long flags;
> > > > > > > +	struct msm_vblank_work *vbl_work;
> > > > > > >
> > > > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > > > > > -	if (!vbl_ev)
> > > > > > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
> > > > > > > +	if (!vbl_work)
> > > > > > >  		return -ENOMEM;
> > > > > > >
> > > > > > > -	vbl_ev->crtc_id = crtc_id;
> > > > > > > -	vbl_ev->enable = enable;
> > > > > > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
> > > > > > >
> > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > +	vbl_work->crtc_id = crtc_id;
> > > > > > > +	vbl_work->enable = enable;
> > > > > > > +	vbl_work->priv = priv;
> > > > > > >
> > > > > > > -	schedule_work(&vbl_ctrl->work);
> > > > > > > +	schedule_work(&vbl_work->work);
> > > > > > >
> > > > > > >  	return 0;
> > > > > > >  }
> > > > > > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct device
> > *dev)
> > > > > > >  	struct msm_drm_private *priv = ddev->dev_private;
> > > > > > >  	struct msm_kms *kms = priv->kms;
> > > > > > >  	struct msm_mdss *mdss = priv->mdss;
> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > > > >  	int i;
> > > > > > >
> > > > > > >  	/* We must cancel and cleanup any pending vblank
> > enable/disable
> > > > > > >  	 * work before drm_irq_uninstall() to avoid work
> > re-enabling an
> > > > > > >  	 * irq after uninstall has disabled it.
> > > > > > >  	 */
> > > > > > > +
> > > > > > >  	msm_gem_shrinker_cleanup(ddev);
> > > > > > >
> > > > > > >  	drm_kms_helper_poll_fini(ddev);
> > > > > > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device
> > *dev)
> > > > > > >  #endif
> > > > > > >  	drm_mode_config_cleanup(ddev);
> > > > > > >
> > > > > > > -	flush_work(&vbl_ctrl->work);
> > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> > &vbl_ctrl->event_list, node)
> > > > > > {
> > > > > > > -		list_del(&vbl_ev->node);
> > > > > > > -		kfree(vbl_ev);
> > > > > > > -	}
> > > > > > > -
> > > > > > >  	/* clean up event worker threads */
> > > > > > >  	for (i = 0; i < priv->num_crtcs; i++) {
> > > > > > >  		if (priv->event_thread[i].thread) {
> > > > > > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device *dev,
> > > > struct
> > > > > > drm_driver *drv)
> > > > > > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
> > > > > > >
> > > > > > >  	INIT_LIST_HEAD(&priv->inactive_list);
> > > > > > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
> > > > > > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
> > > > > > > -	spin_lock_init(&priv->vblank_ctrl.lock);
> > > > > > >
> > > > > > >  	drm_mode_config_init(ddev);
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
> > > > > > b/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > index 05d33a7..d4cbde2 100644
> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
> > > > > > >  	PLANE_PROP_MAX_NUM
> > > > > > >  };
> > > > > > >
> > > > > > > -struct msm_vblank_ctrl {
> > > > > > > -	struct work_struct work;
> > > > > > > -	struct list_head event_list;
> > > > > > > -	spinlock_t lock;
> > > > > > > -};
> > > > > > > -
> > > > > > >  #define MSM_GPU_MAX_RINGS 4
> > > > > > >  #define MAX_H_TILES_PER_DISPLAY 2
> > > > > > >
> > > > > > > @@ -225,7 +219,6 @@ struct msm_drm_private {
> > > > > > >  	struct notifier_block vmap_notifier;
> > > > > > >  	struct shrinker shrinker;
> > > > > > >
> > > > > > > -	struct msm_vblank_ctrl vblank_ctrl;
> > > > > > >  	struct drm_atomic_state *pm_state;
> > > > > > >  };
> > > > > > >
> > > > > > > --
> > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> > Aurora
> > > > > > Forum,
> > > > > > > a Linux Foundation Collaborative Project
> > > > > > >
> > > > > > > _______________________________________________
> > > > > > > Freedreno mailing list
> > > > > > > Freedreno@lists.freedesktop.org
> > > > > > > https://lists.freedesktop.org/mailman/listinfo/freedreno
> > > > >
> > > > > --
> > > > > Jeykumar S
> > > 
> > > --
> > > Jeykumar S
> 
> -- 
> Jeykumar S
On 2018-12-03 06:21, Sean Paul wrote:
> On Fri, Nov 30, 2018 at 04:21:15PM -0800, Jeykumar Sankaran wrote:
>> On 2018-11-30 12:07, Sean Paul wrote:
>> > On Fri, Nov 30, 2018 at 11:45:55AM -0800, Jeykumar Sankaran wrote:
>> > > On 2018-11-29 14:15, Sean Paul wrote:
>> > > > On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
>> > > > > On 2018-11-07 07:55, Sean Paul wrote:
>> > > > > > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran
> wrote:
>> > > > > > > msm maintains a separate structure to define vblank
>> > > > > > > work definitions and a list to track events submitted
>> > > > > > > to the workqueue. We can avoid this redundant list
>> > > > > > > and its protection mechanism, if we subclass the
>> > > > > > > work object to encapsulate vblank event parameters.
>> > > > > > >
>> > > > > > > changes in v2:
>> > > > > > > 	- subclass optimization on system wq (Sean Paul)
>> > > > > >
>> > > > > > I wouldn't do it like this, tbh. One problem is that you've
> lost
>> > your
>> > > > > > flush() on
>> > > > > > unbind, so there's no way to know if you have workers in the
> wild
>> > > > > > waiting
>> > > > > > to
>> > > > > > enable/disable vblank.
>> > > > > >
>> > > > > > Another issues is that AFAICT, we don't need a queue of
>> > > > > > enables/disables,
>> > > > > > but
>> > > > > > rather just the last requested state (ie: should we be on or
> off).
>> > So
>> > > > > > things
>> > > > > > don't need to be this complicated (and we're possibly
> thrashing
>> > vblank
>> > > > > > on/off
>> > > > > > for no reason).
>> > > > > >
>> > > > > > I'm still of the mind that you should just make this
> synchronous
>> > and
>> > > > be
>> > > > > > done
>> > > > > > with the threads (especially since we're still
>> > uncovering/introducing
>> > > > > > races!).
>> > > > > >
>> > > > > While scoping out the effort to make vblank events synchronous,
> I
>> > > > > found
>> > > > > that the spinlock locking order of vblank request sequence and
>> > vblank
>> > > > > callback
>> > > > > sequences are the opposite.
>> > > > >
>> > > > > In DPU, drm_vblank_enable acquires vblank_time_lock before
>> > registering
>> > > > > the crtc to encoder which happens after acquiring
> encoder_spinlock.
>> > > > > But
>> > > > > the vblank_callback acquires encoder_spinlock before accessing
> the
>> > > > > registered
>> > > > > crtc and calling into drm_vblank_handler which tries to acquire
>> > > > > vblank_time_lock.
>> > > > > Acquiring both vblank_time_lock and encoder_spinlock in the same
>> > > > > thread
>> > > > > is leading to deadlock.
>> > > >
>> > > > Hmm, I'm not sure I follow. Are you seeing issues where irq
> overlaps
>> > > > with
>> > > > enable/disable? I hacked in sync vblank enable/disable quickly to
> see
>> > if
>> > > > I
>> > > > could
>> > > > reproduce what you're seeing, but things seemed well behaved.
>> > > >
>> > >
>> > > The race is between drm_vblank_get/put and vblank_handler contexts.
>> > >
>> > > When made synchronous:
>> > >
>> > > while calling drm_vblank_get, the callstack looks like below:
>> > > drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) ->
>> > > __enable_vblank -> dpu_crtc_vblank ->
>> > > dpu_encoder_toggle_vblank_for_crtc
>> > > (tries to acquire enc_spinlock)
>> > >
>> > > In vblank handler, the call stack will be:
>> > > dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback
>> > > (acquires
>> > > enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank
>> > > (tries to
>> > > acquire vblank_time_lock)
>> >
>> > Hmm, I'm not sure how this can happen. We acquire and release the
>> > enc_spinlock
>> > before enabling the irq, yes we will hold on to the vbl_time_lock, but
>> > we
>> > shouldn't be trying to reacquire an encoder's spinlock after we've
>> > enabled
>> > it.
>> In the synchronous approach dpu_encoder_toggle_vblank_for_crtc(which
>> acquires the enc_spinlock) will be called while we
>> are holding the vbl_time_lock.
>> 
>> > I don't know how that can deadlock, since we should never be running
>> > enable and
>> > the handler concurrently.
>> >
>> I agree that vblank_irq handler should not be running before the 
>> enable
>> sequence. But
>> don't you expect the handler to be running while calling the
> vblank_disable
>> sequence?
> 
> This is an entirely different problem though. It's also one that is 
> easier
> to
> fix. I think we could probably grab the enc_spinlock in disable and 
> clear
> the
> crtc pointer.
> 
we do hold enc_spinlock in dpu_encoder_assign_crtc (drm/msm: dpu: Remove 
vblank_callback from encoder)
where we clear the crtc pointer.

> What I'm getting at is that there's no fundamental reason why we need 
> to
> have
> async vblank enable/disable.
> 
> Sean
> 
There is really no *need* to have them async. But I believe the reason 
why they
are implemented this way is to avoid deadlock between the below two 
paths.

Restating the above findings:
vblank_handlers and vblank enable/disable can run concurrently. The 
first trying to acquire
vbl_time_lock holding enc_spinlock. Other trying to acquire enc_spinlock 
holding
vbl_time_lock.

Thanks,
Jeykumar S.


>> vbl disable will try to acquire the locks in the opposite order to 
>> that
> of
>> irq_handler and the
>> same issue is bound to happen.
>> 
>> With your patch, you should be able to simulate this deadlock if you 
>> can
>> inject a delay
>> by adding a pr_err log in vblank_ctrl_queue_work
>> 
>> Thanks,
>> Jeykumar S.
>> 
>> > The only thing I can think of is that the vblank interrupts are firing
>> > after
>> > vblank has been disabled? In that case, it seems like we should
> properly
>> > flush
>> > them.
>> >
>> > Sean
>> >
>> >
>> > >
>> > >
>> > > > I do see that there is a chance to call drm_handle_vblank() while
>> > > > holding
>> > > > enc_spinlock, but couldn't find any obvious lock recursion there.
>> > > >
>> > > > Maybe a callstack or lockdep splat would help?
>> > > >
>> > > > Sean
>> > > >
>> > > >
>> > > > Here's my hack to bypass the display thread:
>> > > >
>> > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
>> > > > b/drivers/gpu/drm/msm/msm_drv.c
>> > > > index 9c9f7ff6960b38..5a3cac5825319e 100644
>> > > > --- a/drivers/gpu/drm/msm/msm_drv.c
>> > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
>> > > > @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct
>> > kthread_work
>> > > > *work)
>> > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
>> > > >  					int crtc_id, bool enable)
>> > > >  {
>> > > > +	struct msm_kms *kms = priv->kms;
>> > > >  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > > -	struct vblank_event *vbl_ev;
>> > > >  	unsigned long flags;
>> > > >
>> > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
>> > > > -	if (!vbl_ev)
>> > > > -		return -ENOMEM;
>> > > > +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > >
>> > > > -	vbl_ev->crtc_id = crtc_id;
>> > > > -	vbl_ev->enable = enable;
>> > > > +	if (enable)
>> > > > +		kms->funcs->enable_vblank(kms,
> priv->crtcs[crtc_id]);
>> > > > +	else
>> > > > +		kms->funcs->disable_vblank(kms,
> priv->crtcs[crtc_id]);
>> > > >
>> > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>> > > >  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > >
>> > > > -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
>> > > > -			&vbl_ctrl->work);
>> > > > -
>> > > >  	return 0;
>> > > >  }
>> > > >
>> > > Even with your patch above, I see frame is getting stuck but it
>> > > recovers
>> > in
>> > > a while.
>> > > The patch I tried was assigning
>> > crtc->funcs->enable_vblank/disable_vblank so
>> > > that
>> > > __enable_vblank can call crtc directly. But the above callstack is
>> > > still
>> > > valid for your patch.
>> > >
>> > > Thanks,
>> > > Jeykumar S.
>> > > >
>> > > >
>> > > > >
>> > > > > In MDP5, I see the same pattern between vblank_time_lock and
>> > list_lock
>> > > > which
>> > > > > is used to track the irq handlers.
>> > > > >
>> > > > > I believe that explains why msm_drv is queuing the vblank
>> > > > > enable/disable
>> > > > > works to WQ after acquiring vblank_time_lock.
>> > > > >
>> > > > > Thanks,
>> > > > > Jeykumar S.
>> > > > >
>> > > > > > Sean
>> > > > > >
>> > > > > > >
>> > > > > > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
>> > > > > > > ---
>> > > > > > >  drivers/gpu/drm/msm/msm_drv.c | 67
>> > > > > > +++++++++++++------------------------------
>> > > > > > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
>> > > > > > >  2 files changed, 20 insertions(+), 54 deletions(-)
>> > > > > > >
>> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
>> > > > > > b/drivers/gpu/drm/msm/msm_drv.c
>> > > > > > > index 6d6c73b..8da5be2 100644
>> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
>> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
>> > > > > > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem
> *addr)
>> > > > > > >  	return val;
>> > > > > > >  }
>> > > > > > >
>> > > > > > > -struct vblank_event {
>> > > > > > > -	struct list_head node;
>> > > > > > > +struct msm_vblank_work {
>> > > > > > > +	struct work_struct work;
>> > > > > > >  	int crtc_id;
>> > > > > > >  	bool enable;
>> > > > > > > +	struct msm_drm_private *priv;
>> > > > > > >  };
>> > > > > > >
>> > > > > > >  static void vblank_ctrl_worker(struct work_struct *work)
>> > > > > > >  {
>> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
>> > > > > > > -						struct
>> > msm_vblank_ctrl,
>> > > > > > work);
>> > > > > > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
>> > > > > > > -					struct msm_drm_private,
>> > > > > > vblank_ctrl);
>> > > > > > > +	struct msm_vblank_work *vbl_work = container_of(work,
>> > > > > > > +						struct
>> > msm_vblank_work,
>> > > > > > work);
>> > > > > > > +	struct msm_drm_private *priv = vbl_work->priv;
>> > > > > > >  	struct msm_kms *kms = priv->kms;
>> > > > > > > -	struct vblank_event *vbl_ev, *tmp;
>> > > > > > > -	unsigned long flags;
>> > > > > > > -
>> > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
>> > &vbl_ctrl->event_list, node)
>> > > > > > {
>> > > > > > > -		list_del(&vbl_ev->node);
>> > > > > > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > > > -
>> > > > > > > -		if (vbl_ev->enable)
>> > > > > > > -			kms->funcs->enable_vblank(kms,
>> > > > > > > -
>> > > > > > priv->crtcs[vbl_ev->crtc_id]);
>> > > > > > > -		else
>> > > > > > > -			kms->funcs->disable_vblank(kms,
>> > > > > > > -
>> > > > > > priv->crtcs[vbl_ev->crtc_id]);
>> > > > > > >
>> > > > > > > -		kfree(vbl_ev);
>> > > > > > > -
>> > > > > > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > > > -	}
>> > > > > > > +	if (vbl_work->enable)
>> > > > > > > +		kms->funcs->enable_vblank(kms,
>> > > > > > priv->crtcs[vbl_work->crtc_id]);
>> > > > > > > +	else
>> > > > > > > +		kms->funcs->disable_vblank(kms,
>> > > > > > priv->crtcs[vbl_work->crtc_id]);
>> > > > > > >
>> > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > > > +	kfree(vbl_work);
>> > > > > > >  }
>> > > > > > >
>> > > > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private
> *priv,
>> > > > > > >  					int crtc_id, bool enable)
>> > > > > > >  {
>> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > > > > > -	struct vblank_event *vbl_ev;
>> > > > > > > -	unsigned long flags;
>> > > > > > > +	struct msm_vblank_work *vbl_work;
>> > > > > > >
>> > > > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
>> > > > > > > -	if (!vbl_ev)
>> > > > > > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
>> > > > > > > +	if (!vbl_work)
>> > > > > > >  		return -ENOMEM;
>> > > > > > >
>> > > > > > > -	vbl_ev->crtc_id = crtc_id;
>> > > > > > > -	vbl_ev->enable = enable;
>> > > > > > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
>> > > > > > >
>> > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
>> > > > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
>> > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
>> > > > > > > +	vbl_work->crtc_id = crtc_id;
>> > > > > > > +	vbl_work->enable = enable;
>> > > > > > > +	vbl_work->priv = priv;
>> > > > > > >
>> > > > > > > -	schedule_work(&vbl_ctrl->work);
>> > > > > > > +	schedule_work(&vbl_work->work);
>> > > > > > >
>> > > > > > >  	return 0;
>> > > > > > >  }
>> > > > > > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct
> device
>> > *dev)
>> > > > > > >  	struct msm_drm_private *priv = ddev->dev_private;
>> > > > > > >  	struct msm_kms *kms = priv->kms;
>> > > > > > >  	struct msm_mdss *mdss = priv->mdss;
>> > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
>> > > > > > > -	struct vblank_event *vbl_ev, *tmp;
>> > > > > > >  	int i;
>> > > > > > >
>> > > > > > >  	/* We must cancel and cleanup any pending vblank
>> > enable/disable
>> > > > > > >  	 * work before drm_irq_uninstall() to avoid work
>> > re-enabling an
>> > > > > > >  	 * irq after uninstall has disabled it.
>> > > > > > >  	 */
>> > > > > > > +
>> > > > > > >  	msm_gem_shrinker_cleanup(ddev);
>> > > > > > >
>> > > > > > >  	drm_kms_helper_poll_fini(ddev);
>> > > > > > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device
>> > *dev)
>> > > > > > >  #endif
>> > > > > > >  	drm_mode_config_cleanup(ddev);
>> > > > > > >
>> > > > > > > -	flush_work(&vbl_ctrl->work);
>> > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
>> > &vbl_ctrl->event_list, node)
>> > > > > > {
>> > > > > > > -		list_del(&vbl_ev->node);
>> > > > > > > -		kfree(vbl_ev);
>> > > > > > > -	}
>> > > > > > > -
>> > > > > > >  	/* clean up event worker threads */
>> > > > > > >  	for (i = 0; i < priv->num_crtcs; i++) {
>> > > > > > >  		if (priv->event_thread[i].thread) {
>> > > > > > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device
> *dev,
>> > > > struct
>> > > > > > drm_driver *drv)
>> > > > > > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
>> > > > > > >
>> > > > > > >  	INIT_LIST_HEAD(&priv->inactive_list);
>> > > > > > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
>> > > > > > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
>> > > > > > > -	spin_lock_init(&priv->vblank_ctrl.lock);
>> > > > > > >
>> > > > > > >  	drm_mode_config_init(ddev);
>> > > > > > >
>> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
>> > > > > > b/drivers/gpu/drm/msm/msm_drv.h
>> > > > > > > index 05d33a7..d4cbde2 100644
>> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.h
>> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.h
>> > > > > > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
>> > > > > > >  	PLANE_PROP_MAX_NUM
>> > > > > > >  };
>> > > > > > >
>> > > > > > > -struct msm_vblank_ctrl {
>> > > > > > > -	struct work_struct work;
>> > > > > > > -	struct list_head event_list;
>> > > > > > > -	spinlock_t lock;
>> > > > > > > -};
>> > > > > > > -
>> > > > > > >  #define MSM_GPU_MAX_RINGS 4
>> > > > > > >  #define MAX_H_TILES_PER_DISPLAY 2
>> > > > > > >
>> > > > > > > @@ -225,7 +219,6 @@ struct msm_drm_private {
>> > > > > > >  	struct notifier_block vmap_notifier;
>> > > > > > >  	struct shrinker shrinker;
>> > > > > > >
>> > > > > > > -	struct msm_vblank_ctrl vblank_ctrl;
>> > > > > > >  	struct drm_atomic_state *pm_state;
>> > > > > > >  };
>> > > > > > >
>> > > > > > > --
>> > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
>> > Aurora
>> > > > > > Forum,
>> > > > > > > a Linux Foundation Collaborative Project
>> > > > > > >
>> > > > > > > _______________________________________________
>> > > > > > > Freedreno mailing list
>> > > > > > > Freedreno@lists.freedesktop.org
>> > > > > > > https://lists.freedesktop.org/mailman/listinfo/freedreno
>> > > > >
>> > > > > --
>> > > > > Jeykumar S
>> > >
>> > > --
>> > > Jeykumar S
>> 
>> --
>> Jeykumar S
On Mon, Dec 03, 2018 at 12:27:42PM -0800, Jeykumar Sankaran wrote:
> On 2018-12-03 06:21, Sean Paul wrote:
> > On Fri, Nov 30, 2018 at 04:21:15PM -0800, Jeykumar Sankaran wrote:
> > > On 2018-11-30 12:07, Sean Paul wrote:
> > > > On Fri, Nov 30, 2018 at 11:45:55AM -0800, Jeykumar Sankaran wrote:
> > > > > On 2018-11-29 14:15, Sean Paul wrote:
> > > > > > On Tue, Nov 20, 2018 at 02:04:14PM -0800, Jeykumar Sankaran wrote:
> > > > > > > On 2018-11-07 07:55, Sean Paul wrote:
> > > > > > > > On Tue, Nov 06, 2018 at 02:36:30PM -0800, Jeykumar Sankaran
> > wrote:
> > > > > > > > > msm maintains a separate structure to define vblank
> > > > > > > > > work definitions and a list to track events submitted
> > > > > > > > > to the workqueue. We can avoid this redundant list
> > > > > > > > > and its protection mechanism, if we subclass the
> > > > > > > > > work object to encapsulate vblank event parameters.
> > > > > > > > >
> > > > > > > > > changes in v2:
> > > > > > > > > 	- subclass optimization on system wq (Sean Paul)
> > > > > > > >
> > > > > > > > I wouldn't do it like this, tbh. One problem is that you've
> > lost
> > > > your
> > > > > > > > flush() on
> > > > > > > > unbind, so there's no way to know if you have workers in the
> > wild
> > > > > > > > waiting
> > > > > > > > to
> > > > > > > > enable/disable vblank.
> > > > > > > >
> > > > > > > > Another issues is that AFAICT, we don't need a queue of
> > > > > > > > enables/disables,
> > > > > > > > but
> > > > > > > > rather just the last requested state (ie: should we be on or
> > off).
> > > > So
> > > > > > > > things
> > > > > > > > don't need to be this complicated (and we're possibly
> > thrashing
> > > > vblank
> > > > > > > > on/off
> > > > > > > > for no reason).
> > > > > > > >
> > > > > > > > I'm still of the mind that you should just make this
> > synchronous
> > > > and
> > > > > > be
> > > > > > > > done
> > > > > > > > with the threads (especially since we're still
> > > > uncovering/introducing
> > > > > > > > races!).
> > > > > > > >
> > > > > > > While scoping out the effort to make vblank events synchronous,
> > I
> > > > > > > found
> > > > > > > that the spinlock locking order of vblank request sequence and
> > > > vblank
> > > > > > > callback
> > > > > > > sequences are the opposite.
> > > > > > >
> > > > > > > In DPU, drm_vblank_enable acquires vblank_time_lock before
> > > > registering
> > > > > > > the crtc to encoder which happens after acquiring
> > encoder_spinlock.
> > > > > > > But
> > > > > > > the vblank_callback acquires encoder_spinlock before accessing
> > the
> > > > > > > registered
> > > > > > > crtc and calling into drm_vblank_handler which tries to acquire
> > > > > > > vblank_time_lock.
> > > > > > > Acquiring both vblank_time_lock and encoder_spinlock in the same
> > > > > > > thread
> > > > > > > is leading to deadlock.
> > > > > >
> > > > > > Hmm, I'm not sure I follow. Are you seeing issues where irq
> > overlaps
> > > > > > with
> > > > > > enable/disable? I hacked in sync vblank enable/disable quickly to
> > see
> > > > if
> > > > > > I
> > > > > > could
> > > > > > reproduce what you're seeing, but things seemed well behaved.
> > > > > >
> > > > >
> > > > > The race is between drm_vblank_get/put and vblank_handler contexts.
> > > > >
> > > > > When made synchronous:
> > > > >
> > > > > while calling drm_vblank_get, the callstack looks like below:
> > > > > drm_vblank_get -> drm_vblank_enable (acquires vblank_time_lock) ->
> > > > > __enable_vblank -> dpu_crtc_vblank ->
> > > > > dpu_encoder_toggle_vblank_for_crtc
> > > > > (tries to acquire enc_spinlock)
> > > > >
> > > > > In vblank handler, the call stack will be:
> > > > > dpu_encoder_phys_vid_vblank_irq -> dpu_encoder_vblank_callback
> > > > > (acquires
> > > > > enc_spinlock) -> dpu_crtc_vblank_callback -> drm_handle_vblank
> > > > > (tries to
> > > > > acquire vblank_time_lock)
> > > >
> > > > Hmm, I'm not sure how this can happen. We acquire and release the
> > > > enc_spinlock
> > > > before enabling the irq, yes we will hold on to the vbl_time_lock, but
> > > > we
> > > > shouldn't be trying to reacquire an encoder's spinlock after we've
> > > > enabled
> > > > it.
> > > In the synchronous approach dpu_encoder_toggle_vblank_for_crtc(which
> > > acquires the enc_spinlock) will be called while we
> > > are holding the vbl_time_lock.
> > > 
> > > > I don't know how that can deadlock, since we should never be running
> > > > enable and
> > > > the handler concurrently.
> > > >
> > > I agree that vblank_irq handler should not be running before the
> > > enable
> > > sequence. But
> > > don't you expect the handler to be running while calling the
> > vblank_disable
> > > sequence?
> > 
> > This is an entirely different problem though. It's also one that is
> > easier
> > to
> > fix. I think we could probably grab the enc_spinlock in disable and
> > clear
> > the
> > crtc pointer.
> > 
> we do hold enc_spinlock in dpu_encoder_assign_crtc (drm/msm: dpu: Remove
> vblank_callback from encoder)
> where we clear the crtc pointer.
> 
> > What I'm getting at is that there's no fundamental reason why we need to
> > have
> > async vblank enable/disable.
> > 
> > Sean
> > 
> There is really no *need* to have them async. But I believe the reason why
> they
> are implemented this way is to avoid deadlock between the below two paths.
> 
> Restating the above findings:
> vblank_handlers and vblank enable/disable can run concurrently. 

I think this is where we disagree. The handler will only be called when
enc->crtc is set.

In the case of disable, we clear the pointer _after_ vblank is
disabled, so enc_spinlock should be uncontested.

On enable, the pointer is set _before_ vblank is enabled, so again the
enc_spinlock is uncontested.

I tracked down the deadlock you found and submitted a patch for it in [1]. The
issue is that vblank was being incorrectly enabled whenever the encoder is on.
With the fix, you can add however long delay you want to vblank_enable and you
won't produce a deadlock.

Sean

[1]- https://lists.freedesktop.org/archives/dri-devel/2018-December/199670.html


> The first
> trying to acquire
> vbl_time_lock holding enc_spinlock. Other trying to acquire enc_spinlock
> holding
> vbl_time_lock.
> 
> Thanks,
> Jeykumar S.
> 
> 
> > > vbl disable will try to acquire the locks in the opposite order to
> > > that
> > of
> > > irq_handler and the
> > > same issue is bound to happen.
> > > 
> > > With your patch, you should be able to simulate this deadlock if you
> > > can
> > > inject a delay
> > > by adding a pr_err log in vblank_ctrl_queue_work
> > > 
> > > Thanks,
> > > Jeykumar S.
> > > 
> > > > The only thing I can think of is that the vblank interrupts are firing
> > > > after
> > > > vblank has been disabled? In that case, it seems like we should
> > properly
> > > > flush
> > > > them.
> > > >
> > > > Sean
> > > >
> > > >
> > > > >
> > > > >
> > > > > > I do see that there is a chance to call drm_handle_vblank() while
> > > > > > holding
> > > > > > enc_spinlock, but couldn't find any obvious lock recursion there.
> > > > > >
> > > > > > Maybe a callstack or lockdep splat would help?
> > > > > >
> > > > > > Sean
> > > > > >
> > > > > >
> > > > > > Here's my hack to bypass the display thread:
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > index 9c9f7ff6960b38..5a3cac5825319e 100644
> > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > @@ -242,24 +242,19 @@ static void vblank_ctrl_worker(struct
> > > > kthread_work
> > > > > > *work)
> > > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
> > > > > >  					int crtc_id, bool enable)
> > > > > >  {
> > > > > > +	struct msm_kms *kms = priv->kms;
> > > > > >  	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > > -	struct vblank_event *vbl_ev;
> > > > > >  	unsigned long flags;
> > > > > >
> > > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > > > > -	if (!vbl_ev)
> > > > > > -		return -ENOMEM;
> > > > > > +	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > >
> > > > > > -	vbl_ev->crtc_id = crtc_id;
> > > > > > -	vbl_ev->enable = enable;
> > > > > > +	if (enable)
> > > > > > +		kms->funcs->enable_vblank(kms,
> > priv->crtcs[crtc_id]);
> > > > > > +	else
> > > > > > +		kms->funcs->disable_vblank(kms,
> > priv->crtcs[crtc_id]);
> > > > > >
> > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > > > >  	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > >
> > > > > > -	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
> > > > > > -			&vbl_ctrl->work);
> > > > > > -
> > > > > >  	return 0;
> > > > > >  }
> > > > > >
> > > > > Even with your patch above, I see frame is getting stuck but it
> > > > > recovers
> > > > in
> > > > > a while.
> > > > > The patch I tried was assigning
> > > > crtc->funcs->enable_vblank/disable_vblank so
> > > > > that
> > > > > __enable_vblank can call crtc directly. But the above callstack is
> > > > > still
> > > > > valid for your patch.
> > > > >
> > > > > Thanks,
> > > > > Jeykumar S.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > In MDP5, I see the same pattern between vblank_time_lock and
> > > > list_lock
> > > > > > which
> > > > > > > is used to track the irq handlers.
> > > > > > >
> > > > > > > I believe that explains why msm_drv is queuing the vblank
> > > > > > > enable/disable
> > > > > > > works to WQ after acquiring vblank_time_lock.
> > > > > > >
> > > > > > > Thanks,
> > > > > > > Jeykumar S.
> > > > > > >
> > > > > > > > Sean
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/msm/msm_drv.c | 67
> > > > > > > > +++++++++++++------------------------------
> > > > > > > > >  drivers/gpu/drm/msm/msm_drv.h |  7 -----
> > > > > > > > >  2 files changed, 20 insertions(+), 54 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > > > index 6d6c73b..8da5be2 100644
> > > > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > > > @@ -203,61 +203,44 @@ u32 msm_readl(const void __iomem
> > *addr)
> > > > > > > > >  	return val;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > -struct vblank_event {
> > > > > > > > > -	struct list_head node;
> > > > > > > > > +struct msm_vblank_work {
> > > > > > > > > +	struct work_struct work;
> > > > > > > > >  	int crtc_id;
> > > > > > > > >  	bool enable;
> > > > > > > > > +	struct msm_drm_private *priv;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  static void vblank_ctrl_worker(struct work_struct *work)
> > > > > > > > >  {
> > > > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
> > > > > > > > > -						struct
> > > > msm_vblank_ctrl,
> > > > > > > > work);
> > > > > > > > > -	struct msm_drm_private *priv = container_of(vbl_ctrl,
> > > > > > > > > -					struct msm_drm_private,
> > > > > > > > vblank_ctrl);
> > > > > > > > > +	struct msm_vblank_work *vbl_work = container_of(work,
> > > > > > > > > +						struct
> > > > msm_vblank_work,
> > > > > > > > work);
> > > > > > > > > +	struct msm_drm_private *priv = vbl_work->priv;
> > > > > > > > >  	struct msm_kms *kms = priv->kms;
> > > > > > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > > > > > > -	unsigned long flags;
> > > > > > > > > -
> > > > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> > > > &vbl_ctrl->event_list, node)
> > > > > > > > {
> > > > > > > > > -		list_del(&vbl_ev->node);
> > > > > > > > > -		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > > > -
> > > > > > > > > -		if (vbl_ev->enable)
> > > > > > > > > -			kms->funcs->enable_vblank(kms,
> > > > > > > > > -
> > > > > > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > > > > > > -		else
> > > > > > > > > -			kms->funcs->disable_vblank(kms,
> > > > > > > > > -
> > > > > > > > priv->crtcs[vbl_ev->crtc_id]);
> > > > > > > > >
> > > > > > > > > -		kfree(vbl_ev);
> > > > > > > > > -
> > > > > > > > > -		spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > > > -	}
> > > > > > > > > +	if (vbl_work->enable)
> > > > > > > > > +		kms->funcs->enable_vblank(kms,
> > > > > > > > priv->crtcs[vbl_work->crtc_id]);
> > > > > > > > > +	else
> > > > > > > > > +		kms->funcs->disable_vblank(kms,
> > > > > > > > priv->crtcs[vbl_work->crtc_id]);
> > > > > > > > >
> > > > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > > > +	kfree(vbl_work);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  static int vblank_ctrl_queue_work(struct msm_drm_private
> > *priv,
> > > > > > > > >  					int crtc_id, bool enable)
> > > > > > > > >  {
> > > > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > > > > > -	struct vblank_event *vbl_ev;
> > > > > > > > > -	unsigned long flags;
> > > > > > > > > +	struct msm_vblank_work *vbl_work;
> > > > > > > > >
> > > > > > > > > -	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
> > > > > > > > > -	if (!vbl_ev)
> > > > > > > > > +	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
> > > > > > > > > +	if (!vbl_work)
> > > > > > > > >  		return -ENOMEM;
> > > > > > > > >
> > > > > > > > > -	vbl_ev->crtc_id = crtc_id;
> > > > > > > > > -	vbl_ev->enable = enable;
> > > > > > > > > +	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
> > > > > > > > >
> > > > > > > > > -	spin_lock_irqsave(&vbl_ctrl->lock, flags);
> > > > > > > > > -	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
> > > > > > > > > -	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
> > > > > > > > > +	vbl_work->crtc_id = crtc_id;
> > > > > > > > > +	vbl_work->enable = enable;
> > > > > > > > > +	vbl_work->priv = priv;
> > > > > > > > >
> > > > > > > > > -	schedule_work(&vbl_ctrl->work);
> > > > > > > > > +	schedule_work(&vbl_work->work);
> > > > > > > > >
> > > > > > > > >  	return 0;
> > > > > > > > >  }
> > > > > > > > > @@ -269,14 +252,13 @@ static int msm_drm_uninit(struct
> > device
> > > > *dev)
> > > > > > > > >  	struct msm_drm_private *priv = ddev->dev_private;
> > > > > > > > >  	struct msm_kms *kms = priv->kms;
> > > > > > > > >  	struct msm_mdss *mdss = priv->mdss;
> > > > > > > > > -	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
> > > > > > > > > -	struct vblank_event *vbl_ev, *tmp;
> > > > > > > > >  	int i;
> > > > > > > > >
> > > > > > > > >  	/* We must cancel and cleanup any pending vblank
> > > > enable/disable
> > > > > > > > >  	 * work before drm_irq_uninstall() to avoid work
> > > > re-enabling an
> > > > > > > > >  	 * irq after uninstall has disabled it.
> > > > > > > > >  	 */
> > > > > > > > > +
> > > > > > > > >  	msm_gem_shrinker_cleanup(ddev);
> > > > > > > > >
> > > > > > > > >  	drm_kms_helper_poll_fini(ddev);
> > > > > > > > > @@ -292,12 +274,6 @@ static int msm_drm_uninit(struct device
> > > > *dev)
> > > > > > > > >  #endif
> > > > > > > > >  	drm_mode_config_cleanup(ddev);
> > > > > > > > >
> > > > > > > > > -	flush_work(&vbl_ctrl->work);
> > > > > > > > > -	list_for_each_entry_safe(vbl_ev, tmp,
> > > > &vbl_ctrl->event_list, node)
> > > > > > > > {
> > > > > > > > > -		list_del(&vbl_ev->node);
> > > > > > > > > -		kfree(vbl_ev);
> > > > > > > > > -	}
> > > > > > > > > -
> > > > > > > > >  	/* clean up event worker threads */
> > > > > > > > >  	for (i = 0; i < priv->num_crtcs; i++) {
> > > > > > > > >  		if (priv->event_thread[i].thread) {
> > > > > > > > > @@ -469,9 +445,6 @@ static int msm_drm_init(struct device
> > *dev,
> > > > > > struct
> > > > > > > > drm_driver *drv)
> > > > > > > > >  	priv->wq = alloc_ordered_workqueue("msm", 0);
> > > > > > > > >
> > > > > > > > >  	INIT_LIST_HEAD(&priv->inactive_list);
> > > > > > > > > -	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
> > > > > > > > > -	INIT_WORK(&priv->vblank_ctrl.work, vblank_ctrl_worker);
> > > > > > > > > -	spin_lock_init(&priv->vblank_ctrl.lock);
> > > > > > > > >
> > > > > > > > >  	drm_mode_config_init(ddev);
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > > b/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > > > index 05d33a7..d4cbde2 100644
> > > > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > > > > > > > > @@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
> > > > > > > > >  	PLANE_PROP_MAX_NUM
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > > -struct msm_vblank_ctrl {
> > > > > > > > > -	struct work_struct work;
> > > > > > > > > -	struct list_head event_list;
> > > > > > > > > -	spinlock_t lock;
> > > > > > > > > -};
> > > > > > > > > -
> > > > > > > > >  #define MSM_GPU_MAX_RINGS 4
> > > > > > > > >  #define MAX_H_TILES_PER_DISPLAY 2
> > > > > > > > >
> > > > > > > > > @@ -225,7 +219,6 @@ struct msm_drm_private {
> > > > > > > > >  	struct notifier_block vmap_notifier;
> > > > > > > > >  	struct shrinker shrinker;
> > > > > > > > >
> > > > > > > > > -	struct msm_vblank_ctrl vblank_ctrl;
> > > > > > > > >  	struct drm_atomic_state *pm_state;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> > > > Aurora
> > > > > > > > Forum,
> > > > > > > > > a Linux Foundation Collaborative Project
> > > > > > > > >
> > > > > > > > > _______________________________________________
> > > > > > > > > Freedreno mailing list
> > > > > > > > > Freedreno@lists.freedesktop.org
> > > > > > > > > https://lists.freedesktop.org/mailman/listinfo/freedreno
> > > > > > >
> > > > > > > --
> > > > > > > Jeykumar S
> > > > >
> > > > > --
> > > > > Jeykumar S
> > > 
> > > --
> > > Jeykumar S
> 
> -- 
> Jeykumar S