[igt] tests/kms_fence_pin_leak: Exercise full ppgtt fence pin_count leak in the kernel

Submitted by Ville Syrjälä on May 12, 2014, 5:46 p.m.

Details

Message ID 1399916784-30841-1-git-send-email-ville.syrjala@linux.intel.com
State New, archived
Headers show

Not browsing as part of any series.

Commit Message

Ville Syrjälä May 12, 2014, 5:46 p.m.
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

The kernel full ppgtt support has a bug where it can drop a pinned
fence to the floor, hence we leak the pin_count as the subsequent
fence unpin becomes a nop. We can trigger it easily by unbinding a
buffer from a ppgtt address space while the buffer is simultaneosly
being used for scanout.

Monitor i915_gem_fence_regs to detect the leak while performing
the required set of operations in a loop.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 tests/Makefile.sources     |   1 +
 tests/kms_fence_pin_leak.c | 224 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)
 create mode 100644 tests/kms_fence_pin_leak.c

Patch hide | download patch | download mbox

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 5d5dc46..393c4a2 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -60,6 +60,7 @@  TESTS_progs_M = \
 	kms_addfb \
 	kms_cursor_crc \
 	kms_fbc_crc \
+	kms_fence_pin_leak \
 	kms_flip \
 	kms_flip_tiling \
 	kms_pipe_crc_basic \
diff --git a/tests/kms_fence_pin_leak.c b/tests/kms_fence_pin_leak.c
new file mode 100644
index 0000000..f299212
--- /dev/null
+++ b/tests/kms_fence_pin_leak.c
@@ -0,0 +1,224 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "drmtest.h"
+#include "igt_debugfs.h"
+#include "igt_kms.h"
+#include "ioctl_wrappers.h"
+#include "intel_chipset.h"
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	igt_display_t display;
+} data_t;
+
+static void exec_nop(data_t *data, uint32_t handle, drm_intel_context *context)
+{
+	drm_intel_bo *dst;
+	struct intel_batchbuffer *batch;
+
+	dst = gem_handle_to_libdrm_bo(data->bufmgr, data->drm_fd, "", handle);
+	igt_assert(dst);
+
+	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
+	igt_assert(batch);
+
+	/* add the reloc to make sure the kernel will think we write to dst */
+	BEGIN_BATCH(4);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	OUT_BATCH(MI_NOOP);
+	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(MI_NOOP);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush_with_context(batch, context);
+	intel_batchbuffer_free(batch);
+
+	drm_intel_bo_unreference(dst);
+}
+
+static unsigned int total_fence_pin_count(void)
+{
+	unsigned int pinned = 0;
+	char buf[128];
+	FILE *f;
+
+	f = igt_debugfs_fopen("i915_gem_fence_regs", "r");
+	igt_assert(f);
+
+	for (;;) {
+		unsigned int pin_count = 0;
+		char *ptr;
+
+		ptr = fgets(buf, sizeof(buf), f);
+		if (!ptr)
+			break;
+
+		if (sscanf(ptr, "Fence %*u, pin count = %u", &pin_count) == 1)
+			pinned += pin_count;
+	}
+
+	fclose(f);
+
+	return pinned;
+}
+
+static bool run_single_test(data_t *data, enum pipe pipe, igt_output_t *output)
+{
+	igt_display_t *display = &data->display;
+	drmModeModeInfo *mode;
+	igt_plane_t *primary;
+	unsigned int pin_count;
+	struct igt_fb fb[2];
+	int i;
+
+	igt_output_set_pipe(output, pipe);
+	igt_display_commit(display);
+
+	if (!output->valid) {
+		igt_output_set_pipe(output, PIPE_ANY);
+		igt_display_commit(display);
+		return false;
+	}
+
+	mode = igt_output_get_mode(output);
+	primary = igt_output_get_plane(output, IGT_PLANE_PRIMARY);
+
+	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
+			    DRM_FORMAT_XRGB8888,
+			    true, /* need a fence so must be tiled */
+			    0.0, 0.0, 0.0,
+			    &fb[0]);
+	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
+			    DRM_FORMAT_XRGB8888,
+			    true, /* need a fence so must be tiled */
+			    0.0, 0.0, 0.0,
+			    &fb[1]);
+
+	igt_plane_set_fb(primary, &fb[0]);
+	igt_display_commit(display);
+
+	/* sample the fence pin_counts now that we have one scanout buffer pinned */
+	pin_count = total_fence_pin_count();
+
+	for (i = 0; i < 64; i++) {
+		drm_intel_context *ctx;
+
+		/*
+		 * Link fb.gem_handle to the ppgtt vm of ctx so that the context
+		 * destruction will unbind the obj from the ppgtt vm in question.
+		 */
+		ctx = drm_intel_gem_context_create(data->bufmgr);
+		igt_assert(ctx);
+		exec_nop(data, fb[i&1].gem_handle, ctx);
+		drm_intel_gem_context_destroy(ctx);
+
+		/* Force a context switch to make sure ctx gets destroyed for real. */
+		exec_nop(data, fb[i&1].gem_handle, NULL);
+
+		gem_sync(data->drm_fd, fb[i&1].gem_handle);
+
+		/*
+		 * Pin the new buffer and unpin the old buffer from display. If
+		 * the kernel is buggy the ppgtt unbind will have dropped the
+		 * fence for the old buffer, and now the display code will try
+		 * to unpin only to find no fence there. So the pin_count will leak.
+		 */
+		igt_plane_set_fb(primary, &fb[!(i&1)]);
+		igt_display_commit(display);
+
+		/*
+		 * We always have exactly one scanout buffer, so the fence pin_count
+		 * should remain static. Fail the test if we detect a leak.
+		 */
+		igt_assert(pin_count == total_fence_pin_count());
+
+		printf(".");
+		fflush(stdout);
+	}
+
+	igt_plane_set_fb(primary, NULL);
+	igt_output_set_pipe(output, PIPE_ANY);
+	igt_display_commit(display);
+
+	igt_remove_fb(data->drm_fd, &fb[1]);
+	igt_remove_fb(data->drm_fd, &fb[0]);
+
+	printf("\n");
+
+	return true;
+}
+
+static void run_test(data_t *data)
+{
+	igt_display_t *display = &data->display;
+	igt_output_t *output;
+	enum pipe p;
+
+	for_each_connected_output(display, output) {
+		for (p = 0; p < igt_display_get_n_pipes(display); p++) {
+			if (run_single_test(data, p, output))
+				return; /* one time ought to be enough */
+		}
+	}
+
+	igt_skip("no valid crtc/connector combinations found\n");
+}
+
+igt_simple_main
+{
+	drm_intel_context *ctx;
+	data_t data = {};
+
+	igt_skip_on_simulation();
+
+	data.drm_fd = drm_open_any();
+
+	data.devid = intel_get_drm_devid(data.drm_fd);
+
+	igt_set_vt_graphics_mode();
+
+	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+	igt_assert(data.bufmgr);
+	drm_intel_bufmgr_gem_enable_reuse(data.bufmgr);
+
+	igt_display_init(&data.display, data.drm_fd);
+
+	ctx = drm_intel_gem_context_create(data.bufmgr);
+	igt_require(ctx);
+	drm_intel_gem_context_destroy(ctx);
+
+	run_test(&data);
+
+	drm_intel_bufmgr_destroy(data.bufmgr);
+	igt_display_fini(&data.display);
+}

Comments

On Mon, May 12, 2014 at 08:46:24PM +0300, ville.syrjala@linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> The kernel full ppgtt support has a bug where it can drop a pinned
> fence to the floor, hence we leak the pin_count as the subsequent
> fence unpin becomes a nop. We can trigger it easily by unbinding a
> buffer from a ppgtt address space while the buffer is simultaneosly
> being used for scanout.
> 
> Monitor i915_gem_fence_regs to detect the leak while performing
> the required set of operations in a loop.

do we really need that part? I've hoped just leaking enough fences until
the kernel blows up should be sufficient.

Generally I'm vary of using debugfs in new tests a bit since we
essentially then lock down that debugfs file as abi. And since no one
easily notices when a test stops catching a bug that's usually bad. Hence
also why I want basic testcases for debugfs infrastructure to make sure it
keeps on working.
-Daniel

> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  tests/Makefile.sources     |   1 +
>  tests/kms_fence_pin_leak.c | 224 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 225 insertions(+)
>  create mode 100644 tests/kms_fence_pin_leak.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 5d5dc46..393c4a2 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -60,6 +60,7 @@ TESTS_progs_M = \
>  	kms_addfb \
>  	kms_cursor_crc \
>  	kms_fbc_crc \
> +	kms_fence_pin_leak \
>  	kms_flip \
>  	kms_flip_tiling \
>  	kms_pipe_crc_basic \
> diff --git a/tests/kms_fence_pin_leak.c b/tests/kms_fence_pin_leak.c
> new file mode 100644
> index 0000000..f299212
> --- /dev/null
> +++ b/tests/kms_fence_pin_leak.c
> @@ -0,0 +1,224 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <errno.h>
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "drmtest.h"
> +#include "igt_debugfs.h"
> +#include "igt_kms.h"
> +#include "ioctl_wrappers.h"
> +#include "intel_chipset.h"
> +
> +typedef struct {
> +	int drm_fd;
> +	uint32_t devid;
> +	drm_intel_bufmgr *bufmgr;
> +	igt_display_t display;
> +} data_t;
> +
> +static void exec_nop(data_t *data, uint32_t handle, drm_intel_context *context)
> +{
> +	drm_intel_bo *dst;
> +	struct intel_batchbuffer *batch;
> +
> +	dst = gem_handle_to_libdrm_bo(data->bufmgr, data->drm_fd, "", handle);
> +	igt_assert(dst);
> +
> +	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
> +	igt_assert(batch);
> +
> +	/* add the reloc to make sure the kernel will think we write to dst */
> +	BEGIN_BATCH(4);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	OUT_BATCH(MI_NOOP);
> +	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> +	OUT_BATCH(MI_NOOP);
> +	ADVANCE_BATCH();
> +
> +	intel_batchbuffer_flush_with_context(batch, context);
> +	intel_batchbuffer_free(batch);
> +
> +	drm_intel_bo_unreference(dst);
> +}
> +
> +static unsigned int total_fence_pin_count(void)
> +{
> +	unsigned int pinned = 0;
> +	char buf[128];
> +	FILE *f;
> +
> +	f = igt_debugfs_fopen("i915_gem_fence_regs", "r");
> +	igt_assert(f);
> +
> +	for (;;) {
> +		unsigned int pin_count = 0;
> +		char *ptr;
> +
> +		ptr = fgets(buf, sizeof(buf), f);
> +		if (!ptr)
> +			break;
> +
> +		if (sscanf(ptr, "Fence %*u, pin count = %u", &pin_count) == 1)
> +			pinned += pin_count;
> +	}
> +
> +	fclose(f);
> +
> +	return pinned;
> +}
> +
> +static bool run_single_test(data_t *data, enum pipe pipe, igt_output_t *output)
> +{
> +	igt_display_t *display = &data->display;
> +	drmModeModeInfo *mode;
> +	igt_plane_t *primary;
> +	unsigned int pin_count;
> +	struct igt_fb fb[2];
> +	int i;
> +
> +	igt_output_set_pipe(output, pipe);
> +	igt_display_commit(display);
> +
> +	if (!output->valid) {
> +		igt_output_set_pipe(output, PIPE_ANY);
> +		igt_display_commit(display);
> +		return false;
> +	}
> +
> +	mode = igt_output_get_mode(output);
> +	primary = igt_output_get_plane(output, IGT_PLANE_PRIMARY);
> +
> +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> +			    DRM_FORMAT_XRGB8888,
> +			    true, /* need a fence so must be tiled */
> +			    0.0, 0.0, 0.0,
> +			    &fb[0]);
> +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> +			    DRM_FORMAT_XRGB8888,
> +			    true, /* need a fence so must be tiled */
> +			    0.0, 0.0, 0.0,
> +			    &fb[1]);
> +
> +	igt_plane_set_fb(primary, &fb[0]);
> +	igt_display_commit(display);
> +
> +	/* sample the fence pin_counts now that we have one scanout buffer pinned */
> +	pin_count = total_fence_pin_count();
> +
> +	for (i = 0; i < 64; i++) {
> +		drm_intel_context *ctx;
> +
> +		/*
> +		 * Link fb.gem_handle to the ppgtt vm of ctx so that the context
> +		 * destruction will unbind the obj from the ppgtt vm in question.
> +		 */
> +		ctx = drm_intel_gem_context_create(data->bufmgr);
> +		igt_assert(ctx);
> +		exec_nop(data, fb[i&1].gem_handle, ctx);
> +		drm_intel_gem_context_destroy(ctx);
> +
> +		/* Force a context switch to make sure ctx gets destroyed for real. */
> +		exec_nop(data, fb[i&1].gem_handle, NULL);
> +
> +		gem_sync(data->drm_fd, fb[i&1].gem_handle);
> +
> +		/*
> +		 * Pin the new buffer and unpin the old buffer from display. If
> +		 * the kernel is buggy the ppgtt unbind will have dropped the
> +		 * fence for the old buffer, and now the display code will try
> +		 * to unpin only to find no fence there. So the pin_count will leak.
> +		 */
> +		igt_plane_set_fb(primary, &fb[!(i&1)]);
> +		igt_display_commit(display);
> +
> +		/*
> +		 * We always have exactly one scanout buffer, so the fence pin_count
> +		 * should remain static. Fail the test if we detect a leak.
> +		 */
> +		igt_assert(pin_count == total_fence_pin_count());
> +
> +		printf(".");
> +		fflush(stdout);
> +	}
> +
> +	igt_plane_set_fb(primary, NULL);
> +	igt_output_set_pipe(output, PIPE_ANY);
> +	igt_display_commit(display);
> +
> +	igt_remove_fb(data->drm_fd, &fb[1]);
> +	igt_remove_fb(data->drm_fd, &fb[0]);
> +
> +	printf("\n");
> +
> +	return true;
> +}
> +
> +static void run_test(data_t *data)
> +{
> +	igt_display_t *display = &data->display;
> +	igt_output_t *output;
> +	enum pipe p;
> +
> +	for_each_connected_output(display, output) {
> +		for (p = 0; p < igt_display_get_n_pipes(display); p++) {
> +			if (run_single_test(data, p, output))
> +				return; /* one time ought to be enough */
> +		}
> +	}
> +
> +	igt_skip("no valid crtc/connector combinations found\n");
> +}
> +
> +igt_simple_main
> +{
> +	drm_intel_context *ctx;
> +	data_t data = {};
> +
> +	igt_skip_on_simulation();
> +
> +	data.drm_fd = drm_open_any();
> +
> +	data.devid = intel_get_drm_devid(data.drm_fd);
> +
> +	igt_set_vt_graphics_mode();
> +
> +	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
> +	igt_assert(data.bufmgr);
> +	drm_intel_bufmgr_gem_enable_reuse(data.bufmgr);
> +
> +	igt_display_init(&data.display, data.drm_fd);
> +
> +	ctx = drm_intel_gem_context_create(data.bufmgr);
> +	igt_require(ctx);
> +	drm_intel_gem_context_destroy(ctx);
> +
> +	run_test(&data);
> +
> +	drm_intel_bufmgr_destroy(data.bufmgr);
> +	igt_display_fini(&data.display);
> +}
> -- 
> 1.8.3.2
>
On Mon, May 12, 2014 at 08:34:07PM +0200, Daniel Vetter wrote:
> On Mon, May 12, 2014 at 08:46:24PM +0300, ville.syrjala@linux.intel.com wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > The kernel full ppgtt support has a bug where it can drop a pinned
> > fence to the floor, hence we leak the pin_count as the subsequent
> > fence unpin becomes a nop. We can trigger it easily by unbinding a
> > buffer from a ppgtt address space while the buffer is simultaneosly
> > being used for scanout.
> > 
> > Monitor i915_gem_fence_regs to detect the leak while performing
> > the required set of operations in a loop.
> 
> do we really need that part? I've hoped just leaking enough fences until
> the kernel blows up should be sufficient.

Sure if you want to let it run for >5 years. That's how long it would
take to make the pin_count overflow on my IVB machine, assuming you hit
the bug on every iteration of the loop, which for some reason doesn't
seem to be the case here.

Another option would be to trick the the pin_count to leak for every
fence registers. Then we wouldn't be able to find a new fence and
we'd get -EDEADLK from i915_find_fence_reg(). If I try to make sure
all  fences have an object associated with them, it should always
pick a fence with pin_count==0, so it seems like this approach should
work. I'll work on a it a bit and we'll see...

> Generally I'm vary of using debugfs in new tests a bit since we
> essentially then lock down that debugfs file as abi. And since no one
> easily notices when a test stops catching a bug that's usually bad. Hence
> also why I want basic testcases for debugfs infrastructure to make sure it
> keeps on working.
> -Daniel
> 
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  tests/Makefile.sources     |   1 +
> >  tests/kms_fence_pin_leak.c | 224 +++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 225 insertions(+)
> >  create mode 100644 tests/kms_fence_pin_leak.c
> > 
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index 5d5dc46..393c4a2 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -60,6 +60,7 @@ TESTS_progs_M = \
> >  	kms_addfb \
> >  	kms_cursor_crc \
> >  	kms_fbc_crc \
> > +	kms_fence_pin_leak \
> >  	kms_flip \
> >  	kms_flip_tiling \
> >  	kms_pipe_crc_basic \
> > diff --git a/tests/kms_fence_pin_leak.c b/tests/kms_fence_pin_leak.c
> > new file mode 100644
> > index 0000000..f299212
> > --- /dev/null
> > +++ b/tests/kms_fence_pin_leak.c
> > @@ -0,0 +1,224 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + */
> > +
> > +#include <errno.h>
> > +#include <limits.h>
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <string.h>
> > +
> > +#include "drmtest.h"
> > +#include "igt_debugfs.h"
> > +#include "igt_kms.h"
> > +#include "ioctl_wrappers.h"
> > +#include "intel_chipset.h"
> > +
> > +typedef struct {
> > +	int drm_fd;
> > +	uint32_t devid;
> > +	drm_intel_bufmgr *bufmgr;
> > +	igt_display_t display;
> > +} data_t;
> > +
> > +static void exec_nop(data_t *data, uint32_t handle, drm_intel_context *context)
> > +{
> > +	drm_intel_bo *dst;
> > +	struct intel_batchbuffer *batch;
> > +
> > +	dst = gem_handle_to_libdrm_bo(data->bufmgr, data->drm_fd, "", handle);
> > +	igt_assert(dst);
> > +
> > +	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
> > +	igt_assert(batch);
> > +
> > +	/* add the reloc to make sure the kernel will think we write to dst */
> > +	BEGIN_BATCH(4);
> > +	OUT_BATCH(MI_BATCH_BUFFER_END);
> > +	OUT_BATCH(MI_NOOP);
> > +	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> > +	OUT_BATCH(MI_NOOP);
> > +	ADVANCE_BATCH();
> > +
> > +	intel_batchbuffer_flush_with_context(batch, context);
> > +	intel_batchbuffer_free(batch);
> > +
> > +	drm_intel_bo_unreference(dst);
> > +}
> > +
> > +static unsigned int total_fence_pin_count(void)
> > +{
> > +	unsigned int pinned = 0;
> > +	char buf[128];
> > +	FILE *f;
> > +
> > +	f = igt_debugfs_fopen("i915_gem_fence_regs", "r");
> > +	igt_assert(f);
> > +
> > +	for (;;) {
> > +		unsigned int pin_count = 0;
> > +		char *ptr;
> > +
> > +		ptr = fgets(buf, sizeof(buf), f);
> > +		if (!ptr)
> > +			break;
> > +
> > +		if (sscanf(ptr, "Fence %*u, pin count = %u", &pin_count) == 1)
> > +			pinned += pin_count;
> > +	}
> > +
> > +	fclose(f);
> > +
> > +	return pinned;
> > +}
> > +
> > +static bool run_single_test(data_t *data, enum pipe pipe, igt_output_t *output)
> > +{
> > +	igt_display_t *display = &data->display;
> > +	drmModeModeInfo *mode;
> > +	igt_plane_t *primary;
> > +	unsigned int pin_count;
> > +	struct igt_fb fb[2];
> > +	int i;
> > +
> > +	igt_output_set_pipe(output, pipe);
> > +	igt_display_commit(display);
> > +
> > +	if (!output->valid) {
> > +		igt_output_set_pipe(output, PIPE_ANY);
> > +		igt_display_commit(display);
> > +		return false;
> > +	}
> > +
> > +	mode = igt_output_get_mode(output);
> > +	primary = igt_output_get_plane(output, IGT_PLANE_PRIMARY);
> > +
> > +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> > +			    DRM_FORMAT_XRGB8888,
> > +			    true, /* need a fence so must be tiled */
> > +			    0.0, 0.0, 0.0,
> > +			    &fb[0]);
> > +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> > +			    DRM_FORMAT_XRGB8888,
> > +			    true, /* need a fence so must be tiled */
> > +			    0.0, 0.0, 0.0,
> > +			    &fb[1]);
> > +
> > +	igt_plane_set_fb(primary, &fb[0]);
> > +	igt_display_commit(display);
> > +
> > +	/* sample the fence pin_counts now that we have one scanout buffer pinned */
> > +	pin_count = total_fence_pin_count();
> > +
> > +	for (i = 0; i < 64; i++) {
> > +		drm_intel_context *ctx;
> > +
> > +		/*
> > +		 * Link fb.gem_handle to the ppgtt vm of ctx so that the context
> > +		 * destruction will unbind the obj from the ppgtt vm in question.
> > +		 */
> > +		ctx = drm_intel_gem_context_create(data->bufmgr);
> > +		igt_assert(ctx);
> > +		exec_nop(data, fb[i&1].gem_handle, ctx);
> > +		drm_intel_gem_context_destroy(ctx);
> > +
> > +		/* Force a context switch to make sure ctx gets destroyed for real. */
> > +		exec_nop(data, fb[i&1].gem_handle, NULL);
> > +
> > +		gem_sync(data->drm_fd, fb[i&1].gem_handle);
> > +
> > +		/*
> > +		 * Pin the new buffer and unpin the old buffer from display. If
> > +		 * the kernel is buggy the ppgtt unbind will have dropped the
> > +		 * fence for the old buffer, and now the display code will try
> > +		 * to unpin only to find no fence there. So the pin_count will leak.
> > +		 */
> > +		igt_plane_set_fb(primary, &fb[!(i&1)]);
> > +		igt_display_commit(display);
> > +
> > +		/*
> > +		 * We always have exactly one scanout buffer, so the fence pin_count
> > +		 * should remain static. Fail the test if we detect a leak.
> > +		 */
> > +		igt_assert(pin_count == total_fence_pin_count());
> > +
> > +		printf(".");
> > +		fflush(stdout);
> > +	}
> > +
> > +	igt_plane_set_fb(primary, NULL);
> > +	igt_output_set_pipe(output, PIPE_ANY);
> > +	igt_display_commit(display);
> > +
> > +	igt_remove_fb(data->drm_fd, &fb[1]);
> > +	igt_remove_fb(data->drm_fd, &fb[0]);
> > +
> > +	printf("\n");
> > +
> > +	return true;
> > +}
> > +
> > +static void run_test(data_t *data)
> > +{
> > +	igt_display_t *display = &data->display;
> > +	igt_output_t *output;
> > +	enum pipe p;
> > +
> > +	for_each_connected_output(display, output) {
> > +		for (p = 0; p < igt_display_get_n_pipes(display); p++) {
> > +			if (run_single_test(data, p, output))
> > +				return; /* one time ought to be enough */
> > +		}
> > +	}
> > +
> > +	igt_skip("no valid crtc/connector combinations found\n");
> > +}
> > +
> > +igt_simple_main
> > +{
> > +	drm_intel_context *ctx;
> > +	data_t data = {};
> > +
> > +	igt_skip_on_simulation();
> > +
> > +	data.drm_fd = drm_open_any();
> > +
> > +	data.devid = intel_get_drm_devid(data.drm_fd);
> > +
> > +	igt_set_vt_graphics_mode();
> > +
> > +	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
> > +	igt_assert(data.bufmgr);
> > +	drm_intel_bufmgr_gem_enable_reuse(data.bufmgr);
> > +
> > +	igt_display_init(&data.display, data.drm_fd);
> > +
> > +	ctx = drm_intel_gem_context_create(data.bufmgr);
> > +	igt_require(ctx);
> > +	drm_intel_gem_context_destroy(ctx);
> > +
> > +	run_test(&data);
> > +
> > +	drm_intel_bufmgr_destroy(data.bufmgr);
> > +	igt_display_fini(&data.display);
> > +}
> > -- 
> > 1.8.3.2
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On Tue, May 13, 2014 at 11:24:48AM +0300, Ville Syrjälä wrote:
> On Mon, May 12, 2014 at 08:34:07PM +0200, Daniel Vetter wrote:
> > On Mon, May 12, 2014 at 08:46:24PM +0300, ville.syrjala@linux.intel.com wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > The kernel full ppgtt support has a bug where it can drop a pinned
> > > fence to the floor, hence we leak the pin_count as the subsequent
> > > fence unpin becomes a nop. We can trigger it easily by unbinding a
> > > buffer from a ppgtt address space while the buffer is simultaneosly
> > > being used for scanout.
> > > 
> > > Monitor i915_gem_fence_regs to detect the leak while performing
> > > the required set of operations in a loop.
> > 
> > do we really need that part? I've hoped just leaking enough fences until
> > the kernel blows up should be sufficient.
> 
> Sure if you want to let it run for >5 years. That's how long it would
> take to make the pin_count overflow on my IVB machine, assuming you hit
> the bug on every iteration of the loop, which for some reason doesn't
> seem to be the case here.

Yeah, that's only feasible if we restrict the pin count like we already do
with obj->pin_count.

> Another option would be to trick the the pin_count to leak for every
> fence registers. Then we wouldn't be able to find a new fence and
> we'd get -EDEADLK from i915_find_fence_reg(). If I try to make sure
> all  fences have an object associated with them, it should always
> pick a fence with pin_count==0, so it seems like this approach should
> work. I'll work on a it a bit and we'll see...

That's actually what I've had in mind ;-)

Aside: I'm of course not against using more of debugfs, so if it's too
much work I'm totally ok with this one. But generally I just prefer
black-box tests with as little introspection through debugfs as possible.

Thanks for doing this.
-Daniel

> 
> > Generally I'm vary of using debugfs in new tests a bit since we
> > essentially then lock down that debugfs file as abi. And since no one
> > easily notices when a test stops catching a bug that's usually bad. Hence
> > also why I want basic testcases for debugfs infrastructure to make sure it
> > keeps on working.
> > -Daniel
> > 
> > > 
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  tests/Makefile.sources     |   1 +
> > >  tests/kms_fence_pin_leak.c | 224 +++++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 225 insertions(+)
> > >  create mode 100644 tests/kms_fence_pin_leak.c
> > > 
> > > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > > index 5d5dc46..393c4a2 100644
> > > --- a/tests/Makefile.sources
> > > +++ b/tests/Makefile.sources
> > > @@ -60,6 +60,7 @@ TESTS_progs_M = \
> > >  	kms_addfb \
> > >  	kms_cursor_crc \
> > >  	kms_fbc_crc \
> > > +	kms_fence_pin_leak \
> > >  	kms_flip \
> > >  	kms_flip_tiling \
> > >  	kms_pipe_crc_basic \
> > > diff --git a/tests/kms_fence_pin_leak.c b/tests/kms_fence_pin_leak.c
> > > new file mode 100644
> > > index 0000000..f299212
> > > --- /dev/null
> > > +++ b/tests/kms_fence_pin_leak.c
> > > @@ -0,0 +1,224 @@
> > > +/*
> > > + * Copyright © 2014 Intel Corporation
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person obtaining a
> > > + * copy of this software and associated documentation files (the "Software"),
> > > + * to deal in the Software without restriction, including without limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to whom the
> > > + * Software is furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice (including the next
> > > + * paragraph) shall be included in all copies or substantial portions of the
> > > + * Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > > + * IN THE SOFTWARE.
> > > + *
> > > + */
> > > +
> > > +#include <errno.h>
> > > +#include <limits.h>
> > > +#include <stdbool.h>
> > > +#include <stdio.h>
> > > +#include <string.h>
> > > +
> > > +#include "drmtest.h"
> > > +#include "igt_debugfs.h"
> > > +#include "igt_kms.h"
> > > +#include "ioctl_wrappers.h"
> > > +#include "intel_chipset.h"
> > > +
> > > +typedef struct {
> > > +	int drm_fd;
> > > +	uint32_t devid;
> > > +	drm_intel_bufmgr *bufmgr;
> > > +	igt_display_t display;
> > > +} data_t;
> > > +
> > > +static void exec_nop(data_t *data, uint32_t handle, drm_intel_context *context)
> > > +{
> > > +	drm_intel_bo *dst;
> > > +	struct intel_batchbuffer *batch;
> > > +
> > > +	dst = gem_handle_to_libdrm_bo(data->bufmgr, data->drm_fd, "", handle);
> > > +	igt_assert(dst);
> > > +
> > > +	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
> > > +	igt_assert(batch);
> > > +
> > > +	/* add the reloc to make sure the kernel will think we write to dst */
> > > +	BEGIN_BATCH(4);
> > > +	OUT_BATCH(MI_BATCH_BUFFER_END);
> > > +	OUT_BATCH(MI_NOOP);
> > > +	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> > > +	OUT_BATCH(MI_NOOP);
> > > +	ADVANCE_BATCH();
> > > +
> > > +	intel_batchbuffer_flush_with_context(batch, context);
> > > +	intel_batchbuffer_free(batch);
> > > +
> > > +	drm_intel_bo_unreference(dst);
> > > +}
> > > +
> > > +static unsigned int total_fence_pin_count(void)
> > > +{
> > > +	unsigned int pinned = 0;
> > > +	char buf[128];
> > > +	FILE *f;
> > > +
> > > +	f = igt_debugfs_fopen("i915_gem_fence_regs", "r");
> > > +	igt_assert(f);
> > > +
> > > +	for (;;) {
> > > +		unsigned int pin_count = 0;
> > > +		char *ptr;
> > > +
> > > +		ptr = fgets(buf, sizeof(buf), f);
> > > +		if (!ptr)
> > > +			break;
> > > +
> > > +		if (sscanf(ptr, "Fence %*u, pin count = %u", &pin_count) == 1)
> > > +			pinned += pin_count;
> > > +	}
> > > +
> > > +	fclose(f);
> > > +
> > > +	return pinned;
> > > +}
> > > +
> > > +static bool run_single_test(data_t *data, enum pipe pipe, igt_output_t *output)
> > > +{
> > > +	igt_display_t *display = &data->display;
> > > +	drmModeModeInfo *mode;
> > > +	igt_plane_t *primary;
> > > +	unsigned int pin_count;
> > > +	struct igt_fb fb[2];
> > > +	int i;
> > > +
> > > +	igt_output_set_pipe(output, pipe);
> > > +	igt_display_commit(display);
> > > +
> > > +	if (!output->valid) {
> > > +		igt_output_set_pipe(output, PIPE_ANY);
> > > +		igt_display_commit(display);
> > > +		return false;
> > > +	}
> > > +
> > > +	mode = igt_output_get_mode(output);
> > > +	primary = igt_output_get_plane(output, IGT_PLANE_PRIMARY);
> > > +
> > > +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> > > +			    DRM_FORMAT_XRGB8888,
> > > +			    true, /* need a fence so must be tiled */
> > > +			    0.0, 0.0, 0.0,
> > > +			    &fb[0]);
> > > +	igt_create_color_fb(data->drm_fd, mode->hdisplay, mode->vdisplay,
> > > +			    DRM_FORMAT_XRGB8888,
> > > +			    true, /* need a fence so must be tiled */
> > > +			    0.0, 0.0, 0.0,
> > > +			    &fb[1]);
> > > +
> > > +	igt_plane_set_fb(primary, &fb[0]);
> > > +	igt_display_commit(display);
> > > +
> > > +	/* sample the fence pin_counts now that we have one scanout buffer pinned */
> > > +	pin_count = total_fence_pin_count();
> > > +
> > > +	for (i = 0; i < 64; i++) {
> > > +		drm_intel_context *ctx;
> > > +
> > > +		/*
> > > +		 * Link fb.gem_handle to the ppgtt vm of ctx so that the context
> > > +		 * destruction will unbind the obj from the ppgtt vm in question.
> > > +		 */
> > > +		ctx = drm_intel_gem_context_create(data->bufmgr);
> > > +		igt_assert(ctx);
> > > +		exec_nop(data, fb[i&1].gem_handle, ctx);
> > > +		drm_intel_gem_context_destroy(ctx);
> > > +
> > > +		/* Force a context switch to make sure ctx gets destroyed for real. */
> > > +		exec_nop(data, fb[i&1].gem_handle, NULL);
> > > +
> > > +		gem_sync(data->drm_fd, fb[i&1].gem_handle);
> > > +
> > > +		/*
> > > +		 * Pin the new buffer and unpin the old buffer from display. If
> > > +		 * the kernel is buggy the ppgtt unbind will have dropped the
> > > +		 * fence for the old buffer, and now the display code will try
> > > +		 * to unpin only to find no fence there. So the pin_count will leak.
> > > +		 */
> > > +		igt_plane_set_fb(primary, &fb[!(i&1)]);
> > > +		igt_display_commit(display);
> > > +
> > > +		/*
> > > +		 * We always have exactly one scanout buffer, so the fence pin_count
> > > +		 * should remain static. Fail the test if we detect a leak.
> > > +		 */
> > > +		igt_assert(pin_count == total_fence_pin_count());
> > > +
> > > +		printf(".");
> > > +		fflush(stdout);
> > > +	}
> > > +
> > > +	igt_plane_set_fb(primary, NULL);
> > > +	igt_output_set_pipe(output, PIPE_ANY);
> > > +	igt_display_commit(display);
> > > +
> > > +	igt_remove_fb(data->drm_fd, &fb[1]);
> > > +	igt_remove_fb(data->drm_fd, &fb[0]);
> > > +
> > > +	printf("\n");
> > > +
> > > +	return true;
> > > +}
> > > +
> > > +static void run_test(data_t *data)
> > > +{
> > > +	igt_display_t *display = &data->display;
> > > +	igt_output_t *output;
> > > +	enum pipe p;
> > > +
> > > +	for_each_connected_output(display, output) {
> > > +		for (p = 0; p < igt_display_get_n_pipes(display); p++) {
> > > +			if (run_single_test(data, p, output))
> > > +				return; /* one time ought to be enough */
> > > +		}
> > > +	}
> > > +
> > > +	igt_skip("no valid crtc/connector combinations found\n");
> > > +}
> > > +
> > > +igt_simple_main
> > > +{
> > > +	drm_intel_context *ctx;
> > > +	data_t data = {};
> > > +
> > > +	igt_skip_on_simulation();
> > > +
> > > +	data.drm_fd = drm_open_any();
> > > +
> > > +	data.devid = intel_get_drm_devid(data.drm_fd);
> > > +
> > > +	igt_set_vt_graphics_mode();
> > > +
> > > +	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
> > > +	igt_assert(data.bufmgr);
> > > +	drm_intel_bufmgr_gem_enable_reuse(data.bufmgr);
> > > +
> > > +	igt_display_init(&data.display, data.drm_fd);
> > > +
> > > +	ctx = drm_intel_gem_context_create(data.bufmgr);
> > > +	igt_require(ctx);
> > > +	drm_intel_gem_context_destroy(ctx);
> > > +
> > > +	run_test(&data);
> > > +
> > > +	drm_intel_bufmgr_destroy(data.bufmgr);
> > > +	igt_display_fini(&data.display);
> > > +}
> > > -- 
> > > 1.8.3.2
> > > 
> > 
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> 
> -- 
> Ville Syrjälä
> Intel OTC