[WESTON,2/2] compositor-drm: Watchdog timer implementation

Submitted by Frederic Plourde on Oct. 29, 2014, 6:13 p.m.

Details

Message ID 1414606409-32162-2-git-send-email-frederic.plourde@collabora.co.uk
State Superseded
Headers show

Not browsing as part of any series.

Commit Message

Frederic Plourde Oct. 29, 2014, 6:13 p.m.
Weston will not repaint until previous update has been acked by
a page-flip event coming from the drm driver. However, some buggy drivers
won't return those events or will stop sending them at some point and
Weston output repaints will completely freeze. To ease developers' task in
testing their drivers, this patch makes compositor-drm use a watchdog timer
to detect cases where those page-flip events stop coming.

This watchdog timer (WDT) implementation is software only and includes
basic features usually found in a WDT. We simply exit Weston gracefully
with an exit code when the dog barks.

The watchdog timeout value can be set via weston.ini by adding a
watchdog-timer-timeout=<SECONDS> entry under a new [compositor-drm]
section. Setting this timeout to 0 disables the watchdog feature.

https://bugs.freedesktop.org/show_bug.cgi?id=83884
---
 src/compositor-drm.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/compositor-drm.c b/src/compositor-drm.c
index 07b83a7..f9fb673 100644
--- a/src/compositor-drm.c
+++ b/src/compositor-drm.c
@@ -122,6 +122,8 @@  struct drm_compositor {
 
 	uint32_t cursor_width;
 	uint32_t cursor_height;
+
+	uint32_t watchdog_timer_timeout;
 };
 
 struct drm_mode {
@@ -183,6 +185,11 @@  struct drm_output {
 
 	struct vaapi_recorder *recorder;
 	struct wl_listener recorder_frame_listener;
+
+	struct watchdog_timer {
+		struct wl_event_source *timer;
+		uint32_t timeout;
+	} wdt;
 };
 
 /*
@@ -221,6 +228,57 @@  static struct gl_renderer_interface *gl_renderer;
 
 static const char default_seat[] = "seat0";
 
+/* Creates a watchdog timer. Note that timer is not armed by default */
+static int
+drm_output_watchdog_timer_create(struct watchdog_timer *wdt, struct wl_display *display,
+					  wl_event_loop_timer_func_t timeout_func, void *timer_arg,
+					  uint32_t timeout)
+{
+	struct wl_event_loop *loop = NULL;
+
+	loop = wl_display_get_event_loop(display);
+	if (loop) {
+		wdt->timer = wl_event_loop_add_timer(loop,
+											 timeout_func, timer_arg);
+		if (wdt->timer) {
+			wdt->timeout = timeout;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+/* Start/reset/arm the dog's timer. This can also be used to 'pat' the dog */
+static int
+drm_output_watchdog_timer_start(struct watchdog_timer *wdt)
+{
+	if (wdt->timer)
+		return wl_event_source_timer_update(wdt->timer, wdt->timeout);
+
+	return -1;
+}
+
+/* Disarms the watchdog */
+static int
+drm_output_watchdog_timer_stop(struct watchdog_timer *wdt)
+{
+	if (wdt->timer)
+		return wl_event_source_timer_update(wdt->timer, 0);
+
+	return -1;
+}
+
+static void
+drm_output_watchdog_timer_destroy(struct watchdog_timer *wdt)
+{
+	if (wdt->timer) {
+		wl_event_source_remove(wdt->timer);
+	}
+
+	wdt->timer = NULL;
+}
+
 static void
 drm_output_set_cursor(struct drm_output *output);
 
@@ -591,6 +649,20 @@  drm_output_set_gamma(struct weston_output *output_base,
 		weston_log("set gamma failed: %m\n");
 }
 
+static void
+drm_output_watchdog_bark(void *output) {
+	/*
+	* Our watchdog just barked at us, means we're not receiving drm
+	* page flip events anymore for that output. Let's gracefully exit
+	* weston with a return value so devs can debug what's going on.
+	*/
+	struct drm_output *out = (struct drm_output *) output;
+	struct weston_compositor *compositor =
+		(struct weston_compositor *) out->base.compositor;
+
+	weston_compositor_exit_with_code(compositor, EXIT_FAILURE);
+}
+
 static int
 drm_output_repaint(struct weston_output *output_base,
 		   pixman_region32_t *damage)
@@ -624,6 +696,13 @@  drm_output_repaint(struct weston_output *output_base,
 		output_base->set_dpms(output_base, WESTON_DPMS_ON);
 	}
 
+	/* Start out watchdog timer */
+	if (output->wdt.timer) {
+		if (drm_output_watchdog_timer_start(&output->wdt)) {
+			weston_log("failed to start drm watchdog: %m\n");
+		}
+	}
+
 	if (drmModePageFlip(compositor->drm.fd, output->crtc_id,
 			    output->next->fb_id,
 			    DRM_MODE_PAGE_FLIP_EVENT, output) < 0) {
@@ -781,6 +860,10 @@  page_flip_handler(int fd, unsigned int frame,
 
 	output->page_flip_pending = 0;
 
+	/* Stop the watchdog instead of patting it here */
+	if (output->wdt.timer)
+		drm_output_watchdog_timer_stop(&output->wdt);
+
 	if (output->destroy_pending)
 		drm_output_destroy(&output->base);
 	else if (!output->vblank_pending) {
@@ -1176,6 +1259,8 @@  drm_output_destroy(struct weston_output *output_base)
 		gbm_surface_destroy(output->surface);
 	}
 
+	drm_output_watchdog_timer_destroy(&output->wdt);
+
 	weston_plane_release(&output->fb_plane);
 	weston_plane_release(&output->cursor_plane);
 
@@ -2013,6 +2098,17 @@  create_output_for_connector(struct drm_compositor *ec,
 
 	output->crtc_id = resources->crtcs[i];
 	output->pipe = i;
+
+	output->wdt.timer = NULL;
+	if (ec->watchdog_timer_timeout) {
+		if (drm_output_watchdog_timer_create(&output->wdt,
+			   ec->base.wl_display, (wl_event_loop_timer_func_t)
+			   drm_output_watchdog_bark, output,
+			   ec->watchdog_timer_timeout)) {
+			weston_log("creating drm watchdog timer failed: %m\n");
+		}
+	}
+
 	ec->crtc_allocator |= (1 << output->crtc_id);
 	output->connector_id = connector->connector_id;
 	ec->connector_allocator |= (1 << output->connector_id);
@@ -2786,6 +2882,10 @@  drm_compositor_create(struct wl_display *display,
 					&ec->format) == -1)
 		goto err_base;
 
+	section = weston_config_get_section(config, "compositor-drm", NULL, NULL);
+	weston_config_section_get_uint(section, "watchdog-timer-timeout",
+									 &ec->watchdog_timer_timeout, 0);
+
 	ec->use_pixman = param->use_pixman;
 
 	if (weston_compositor_init(&ec->base, display, argc, argv,

Comments

On 10/29/2014 11:13 AM, Frederic Plourde wrote:

> This watchdog timer (WDT) implementation is software only and includes
> basic features usually found in a WDT. We simply exit Weston gracefully
> with an exit code when the dog barks.
>
> The watchdog timeout value can be set via weston.ini by adding a
> watchdog-timer-timeout=<SECONDS> entry under a new [compositor-drm]
> section. Setting this timeout to 0 disables the watchdog feature.

> +/* Creates a watchdog timer. Note that timer is not armed by default */
> +static int
> +drm_output_watchdog_timer_create(struct watchdog_timer *wdt, struct wl_display *display,
> +					  wl_event_loop_timer_func_t timeout_func, void *timer_arg,
> +					  uint32_t timeout)
> +{...

I think it would be better to remove all these arguments and just make 
this function directly use the values that are being passed to it now. 
Right now it is a bit hard to find what function is called on timeout 
because I have to locate the call to this function. Also this should 
print to the log the error rather than the caller doing so.

> +/* Start/reset/arm the dog's timer. This can also be used to 'pat' the dog */
> +static int
> +drm_output_watchdog_timer_start(struct watchdog_timer *wdt)
> +{
> +	if (wdt->timer)
> +		return wl_event_source_timer_update(wdt->timer, wdt->timeout);
> +
> +	return -1;
> +}

Instead of the caller doing the logging I would do it here. You could 
skip the log message if it failed to create the timer earlier since that 
would have printed a message.

IMHO this is a better method for making these functions as long as they 
are static and not intended to be reused.

Also wondering if weston_compositor_exit_with_code could be avoided by 
fixing things to use atexit and then directly calling exit(). Or at 
least fix it up enough that when you want to exit with an error it is ok 
to skip the cleanup code that was missed.
On 14-10-29 02:45 PM, Bill Spitzak wrote:
> On 10/29/2014 11:13 AM, Frederic Plourde wrote:
>
>> This watchdog timer (WDT) implementation is software only and includes
>> basic features usually found in a WDT. We simply exit Weston gracefully
>> with an exit code when the dog barks.
>>
>> The watchdog timeout value can be set via weston.ini by adding a
>> watchdog-timer-timeout=<SECONDS> entry under a new [compositor-drm]
>> section. Setting this timeout to 0 disables the watchdog feature.
>
>> +/* Creates a watchdog timer. Note that timer is not armed by default */
>> +static int
>> +drm_output_watchdog_timer_create(struct watchdog_timer *wdt, struct 
>> wl_display *display,
>> +                      wl_event_loop_timer_func_t timeout_func, void 
>> *timer_arg,
>> +                      uint32_t timeout)
>> +{...
>
> I think it would be better to remove all these arguments and just make 
> this function directly use the values that are being passed to it now. 
> Right now it is a bit hard to find what function is called on timeout 
> because I have to locate the call to this function. Also this should 
> print to the log the error rather than the caller doing so.

Mhh... you mean something like :

static int
drm_output_watchdog_timer_create(struct drm_output *output)
{
     struct wl_event_loop *loop = NULL;
     struct weston_compositor *ec = output_base->compositor;

     loop = wl_display_get_event_loop(ec->wl_display);
     if (loop) {
         output->wdt->timer = wl_event_loop_add_timer(loop,
                                         drm_output_watchdog_bark, 
ec->watchdog_timer_timeout);
     blah..blah...

?

If so, then I think I'd prefer to completely remove 
'drm_output_watchdog_timer_create" and inline it directly from 
'create_output_for_connector', since it's not a very big function anyway 
and we're not sure as of today that we're going to reuse this for 
anything else.

In that case, I'd also remove 'drm_output_watchdog_timer_destroy' and 
inline it in 'drm_output_destroy'

what do you think ?
I'm usually not a big fan of static functions with hard-coded values, 
but if you tell me it's better for clarity... I might be tempted, dunno.
>
>> +/* Start/reset/arm the dog's timer. This can also be used to 'pat' 
>> the dog */
>> +static int
>> +drm_output_watchdog_timer_start(struct watchdog_timer *wdt)
>> +{
>> +    if (wdt->timer)
>> +        return wl_event_source_timer_update(wdt->timer, wdt->timeout);
>> +
>> +    return -1;
>> +}
>
> Instead of the caller doing the logging I would do it here. You could 
> skip the log message if it failed to create the timer earlier since 
> that would have printed a message.
fair.
>
> IMHO this is a better method for making these functions as long as 
> they are static and not intended to be reused.
>
> Also wondering if weston_compositor_exit_with_code could be avoided by 
> fixing things to use atexit and then directly calling exit(). Or at 
> least fix it up enough that when you want to exit with an error it is 
> ok to skip the cleanup code that was missed.
I'd like to hear Pekka on that one ;-)
>
> _______________________________________________
> wayland-devel mailing list
> wayland-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/wayland-devel
On 10/29/2014 12:19 PM, Frederic Plourde wrote:

> Mhh... you mean something like :
>
> static int
> drm_output_watchdog_timer_create(struct drm_output *output)
> {
>      struct wl_event_loop *loop = NULL;
>      struct weston_compositor *ec = output_base->compositor;
>
>      loop = wl_display_get_event_loop(ec->wl_display);
>      if (loop) {
>          output->wdt->timer = wl_event_loop_add_timer(loop,
>                                          drm_output_watchdog_bark,
> ec->watchdog_timer_timeout);
>      blah..blah...

Yes that is what I meant.

> If so, then I think I'd prefer to completely remove
> 'drm_output_watchdog_timer_create" and inline it directly from
> 'create_output_for_connector', since it's not a very big function anyway
> and we're not sure as of today that we're going to reuse this for
> anything else.

Probably that is ok as well. The main problem is that it is impossible 
to inline the "bark" function so one part of the code is in a different 
place, which may mean it is clearer to put the reference to that 
function up next to it too.

> what do you think ?
> I'm usually not a big fan of static functions with hard-coded values,
> but if you tell me it's better for clarity... I might be tempted, dunno.

Generally I don't think a generic function should be written until you 
have at least 2 instances where it is used.
On 14-10-29 05:40 PM, Bill Spitzak wrote:
> On 10/29/2014 12:19 PM, Frederic Plourde wrote:
>
>> Mhh... you mean something like :
>>
>> static int
>> drm_output_watchdog_timer_create(struct drm_output *output)
>> {
>>      struct wl_event_loop *loop = NULL;
>>      struct weston_compositor *ec = output_base->compositor;
>>
>>      loop = wl_display_get_event_loop(ec->wl_display);
>>      if (loop) {
>>          output->wdt->timer = wl_event_loop_add_timer(loop,
>> drm_output_watchdog_bark,
>> ec->watchdog_timer_timeout);
>>      blah..blah...
>
> Yes that is what I meant.
>
>> If so, then I think I'd prefer to completely remove
>> 'drm_output_watchdog_timer_create" and inline it directly from
>> 'create_output_for_connector', since it's not a very big function anyway
>> and we're not sure as of today that we're going to reuse this for
>> anything else.
>
> Probably that is ok as well. The main problem is that it is impossible 
> to inline the "bark" function so one part of the code is in a 
> different place, which may mean it is clearer to put the reference to 
> that function up next to it too.
>
>> what do you think ?
>> I'm usually not a big fan of static functions with hard-coded values,
>> but if you tell me it's better for clarity... I might be tempted, dunno.
>
> Generally I don't think a generic function should be written until you 
> have at least 2 instances where it is used.

I think you won me over... Let's pretend we're not going to use that for 
anything else for now.
patch v2 coming up next :)

> _______________________________________________
> wayland-devel mailing list
> wayland-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/wayland-devel