[i-g-t,v2] tests/gem_shrink: Exercise OOM and other routes to shrinking in reasonable time

Submitted by Tvrtko Ursulin on Jan. 7, 2019, 5:36 p.m.

Details

Message ID 20190107173640.9620-1-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series "tests/gem_shrink: Exercise OOM and other routes to shrinking in reasonable time" ( rev: 2 ) in IGT

Not browsing as part of any series.

Commit Message

Tvrtko Ursulin Jan. 7, 2019, 5:36 p.m.
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A set of subtests which exercises different paths to our shrinker code
(including the OOM killer) in predictable and reasonable time budget.

v2:
 * Fix blacklist regexp. (Petri Latvala)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/igt_core.c                        |  19 ++
 lib/igt_core.h                        |   1 +
 tests/i915/gem_shrink.c               | 399 ++++++++++++++++++++++++++
 tests/intel-ci/blacklist.txt          |   2 +-
 tests/intel-ci/fast-feedback.testlist |   3 +
 5 files changed, 423 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/lib/igt_core.c b/lib/igt_core.c
index 50d6008f6c82..351da0b4e020 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -1685,6 +1685,25 @@  void igt_stop_helper(struct igt_helper_process *proc)
 	assert(helper_was_alive(proc, status));
 }
 
+/**
+ * igt_try_stop_helper:
+ * @proc: #igt_helper_process structure
+ *
+ * Terminates a helper process if it is still running and returns true, or false
+ * if the process wasn't running.
+ */
+bool igt_try_stop_helper(struct igt_helper_process *proc)
+{
+	int status;
+
+	/* failure here means the pid is already dead and so waiting is safe */
+	kill(proc->pid, proc->use_SIGKILL ? SIGKILL : SIGTERM);
+
+	status = igt_wait_helper(proc);
+
+	return helper_was_alive(proc, status);
+}
+
 static void children_exit_handler(int sig)
 {
 	int status;
diff --git a/lib/igt_core.h b/lib/igt_core.h
index 6f8c3852a686..ed5ceebf1205 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -795,6 +795,7 @@  bool __igt_fork_helper(struct igt_helper_process *proc);
 	for (; __igt_fork_helper(proc); exit(0))
 int igt_wait_helper(struct igt_helper_process *proc);
 void igt_stop_helper(struct igt_helper_process *proc);
+bool igt_try_stop_helper(struct igt_helper_process *proc);
 
 /* exit handler code */
 
diff --git a/tests/i915/gem_shrink.c b/tests/i915/gem_shrink.c
index c8e05814ee70..7c002de0ef1f 100644
--- a/tests/i915/gem_shrink.c
+++ b/tests/i915/gem_shrink.c
@@ -26,6 +26,10 @@ 
  *
  * Exercise the shrinker by overallocating GEM objects
  */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
 
 #include "igt.h"
 #include "igt_gt.h"
@@ -366,6 +370,376 @@  static void reclaim(unsigned engine, int timeout)
 	close(fd);
 }
 
+static unsigned long get_meminfo(const char *info, const char *tag)
+{
+	const char *str;
+	unsigned long val;
+
+	str = strstr(info, tag);
+	if (str && sscanf(str + strlen(tag), " %lu", &val) == 1)
+		return val >> 10;
+
+	igt_warn("Unrecognised /proc/meminfo field: '%s'\n", tag);
+	return 0;
+}
+
+static unsigned long get_avail_ram_mb(void)
+{
+	int fd;
+	int ret;
+	char buf[4096];
+	unsigned long ram;
+
+	fd = open("/proc/meminfo", O_RDONLY);
+	igt_assert_fd(fd);
+
+	ret = read(fd, buf, sizeof(buf));
+	igt_assert(ret >= 0);
+
+	close(fd);
+
+	ram = get_meminfo(buf, "MemAvailable:");
+	ram += get_meminfo(buf, "Buffers:");
+	ram += get_meminfo(buf, "Cached:");
+	ram += get_meminfo(buf, "SwapCached:");
+
+	return ram;
+}
+
+struct test {
+#define TEST_BO		(1)
+#define TEST_USERPTR	(2)
+	unsigned int flags;
+	int fd;
+};
+
+static uint32_t __get_pages(int fd, unsigned long alloc)
+{
+	uint32_t handle = gem_create(fd, alloc);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+
+	return handle;
+}
+
+struct test_obj {
+	void *ptr;
+	uint32_t handle;
+};
+
+#define PAGE_SIZE 4096
+static void
+__get_userptr(int fd, struct test_obj *obj, unsigned long sz)
+{
+	struct local_i915_gem_userptr userptr = { };
+	void *ptr;
+
+	igt_assert_eq(sz & 4095, 0);
+
+	ptr = mmap(NULL, sz, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	assert(ptr != MAP_FAILED);
+
+	for (size_t page = 0; page < sz; page += PAGE_SIZE)
+		*(volatile uint32_t *)((unsigned char *)ptr + page) = 0;
+
+	userptr.user_size = sz;
+	userptr.user_ptr = to_user_pointer(ptr);
+	do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+
+	gem_set_domain(fd, userptr.handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_madvise(fd, userptr.handle, I915_MADV_DONTNEED);
+
+	obj->ptr = ptr;
+	obj->handle = userptr.handle;
+}
+
+/*
+ * Use a specific way of using up memory until we are below a certain threshold.
+ */
+static void *mempressure(void *arg)
+{
+	const unsigned int free_threshold_mb = 256;
+	struct test_obj *list = NULL;
+	struct test *test = arg;
+	const unsigned int sz_mb = 2;
+	const unsigned int sz = sz_mb << 20;
+	unsigned int n = 0, max = 0;
+	unsigned int blocks;
+
+	for (;;) {
+		unsigned long ram_mb = get_avail_ram_mb();
+
+		if (!list) {
+			/*
+			 * On first pass estimate how many sz_mb sized blocks
+			 * we could need to use up all RAM.
+			 */
+			blocks = ram_mb / sz_mb;
+			list = calloc(blocks, sizeof(*list));
+			igt_assert(list);
+		} else if (ram_mb < free_threshold_mb) {
+			/* Limit the working set once under the threshold. */
+			blocks = max + 1;
+		}
+
+		/* Free the oldest block once the working set wrapped. */
+		if (list[n].ptr || list[n].handle) {
+			if (test->flags & TEST_USERPTR) {
+				munmap(list[n].ptr, sz);
+				gem_close(test->fd, list[n].handle);
+			} else if (test->flags & TEST_BO) {
+				gem_close(test->fd, list[n].handle);
+			} else {
+				munmap(list[n].ptr, sz);
+			}
+		}
+
+		/*
+		 * Allocate memory blocks and record the current working set
+		 * size.
+		 */
+		if (test->flags & TEST_BO) {
+			list[n].handle = __get_pages(test->fd, sz);
+		} else if (test->flags & TEST_USERPTR) {
+			__get_userptr(test->fd, &list[n], sz);
+		} else {
+			list[n].ptr = mmap(NULL, sz, PROT_WRITE,
+					   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+			assert(list[n].ptr != MAP_FAILED);
+
+			madvise(list[n].ptr, sz, MADV_HUGEPAGE);
+
+			for (size_t page = 0; page < sz; page += PAGE_SIZE)
+				*(volatile uint32_t *)((unsigned char *)list[n].ptr + page) = 0;
+		}
+
+		if (n > max)
+			max = n;
+
+		n++;
+
+		/*
+		 * Start freeing the oldest and reallocating once maximum
+		 * working set size has been reached.
+		 */
+		if (n >= blocks)
+			n = 0;
+	}
+
+	return NULL;
+}
+
+static void oom_adjust(const char *score)
+{
+	int fd;
+
+	fd = open("/proc/self/oom_score_adj", O_WRONLY);
+	igt_assert_fd(fd);
+	igt_assert(write(fd, score, sizeof(score)) == sizeof(score));
+	close(fd);
+}
+
+static void trigger_oom(void)
+{
+	const char *cmd = "f";
+	int fd;
+
+	fd = open("/proc/sysrq-trigger", O_WRONLY);
+	igt_assert_fd(fd);
+	igt_assert(write(fd, cmd, sizeof(cmd)) == sizeof(cmd));
+	close(fd);
+}
+
+static bool has_sysrq_trigger(void)
+{
+	int fd;
+
+	fd = open("/proc/sysrq-trigger", O_WRONLY);
+	close(fd);
+
+	return fd >= 0;
+}
+
+/*
+ * Exercise different paths to our shrinker code, including OOM, in predictable
+ * and reasonable time budget.
+ */
+static void reclaim_oom(unsigned int flags, unsigned int passes)
+{
+	unsigned int count = 0;
+
+	oom_adjust("-1000");
+
+	do {
+		struct igt_helper_process gem_child = { .use_SIGKILL = true };
+		struct igt_helper_process mem_child = { .use_SIGKILL = true };
+		struct igt_helper_process eb_child = { .use_SIGKILL = true };
+		struct igt_helper_process drop_child = { .use_SIGKILL = true };
+
+		igt_debug("Iteration %u...\n", ++count);
+
+		/*
+		 * Apply either anon backed or shmem backed memory pressure
+		 * to the amount to use up almost all available RAM.
+		 */
+		igt_fork_helper(&mem_child) {
+			struct test test = { };
+
+			if ((flags & (TEST_BO | TEST_USERPTR)) ==
+			    (TEST_BO | TEST_USERPTR))
+				test.flags = TEST_BO;
+
+			/* Sacrifice the memory hog if it comes to that. */
+			oom_adjust("500");
+
+			if (test.flags == TEST_BO) {
+				test.fd = drm_open_driver_render(DRIVER_INTEL);
+				igt_require_gem(test.fd);
+			}
+
+			mempressure(&test);
+
+			if (test.flags == TEST_BO)
+				close(test.fd);
+		}
+
+		/*
+		 * Apply either userptr backed or shmem backed memory pressure
+		 * to the amount to use up almost all available RAM.
+		 */
+		igt_fork_helper(&gem_child) {
+			struct test test = { .flags = flags };
+
+			if ((flags & (TEST_BO | TEST_USERPTR)) ==
+			    (TEST_BO | TEST_USERPTR))
+				test.flags = TEST_USERPTR;
+
+			/* Sacrifice the memory hog if it comes to that. */
+			oom_adjust("500");
+
+			test.fd = drm_open_driver_render(DRIVER_INTEL);
+			igt_require_gem(test.fd);
+
+			mempressure(&test);
+
+			close(test.fd);
+		}
+
+		/*
+		 * Apply an execbuf load to exercise the request allocation and
+		 * direct reclaim from this path.
+		 *
+		 * Occasionaly sync with execution and pause for a little bit to
+		 * avoid hogging to much from this client.
+		 */
+		igt_fork_helper(&eb_child) {
+			struct test test = { .flags = flags };
+			const uint32_t bbe = MI_BATCH_BUFFER_END;
+			struct drm_i915_gem_exec_object2 obj = { };
+			struct drm_i915_gem_execbuffer2 execbuf = { };
+
+			execbuf.buffers_ptr = to_user_pointer(&obj);
+			execbuf.buffer_count = 1;
+
+			test.fd = drm_open_driver_render(DRIVER_INTEL);
+			igt_require_gem(test.fd);
+
+			for (;;) {
+				unsigned long eb = 0;
+				struct timespec ts = { };
+				unsigned long start;
+
+				igt_nsec_elapsed(&ts);
+				start = igt_nsec_elapsed(&ts) / 1e6;
+
+				for (;;) {
+					unsigned long now;
+
+					obj.handle = gem_create(test.fd, 4096);
+					gem_write(test.fd, obj.handle, 0, &bbe,
+						sizeof(bbe));
+					gem_execbuf(test.fd, &execbuf);
+					eb++;
+					now = igt_nsec_elapsed(&ts) / 1e6;
+					if (now > (start + 1000)) {
+						gem_sync(test.fd, obj.handle);
+						if (now > (start + 2000)) {
+							gem_close(test.fd,
+								  obj.handle);
+							break;
+						}
+					}
+					gem_close(test.fd, obj.handle);
+				}
+
+				igt_debug("%lu execbufs\n", eb);
+				usleep(500e3);
+			}
+
+			close(test.fd);
+		}
+
+		/*
+		 * Manually drop cached with the DROP_ACTIVE flag set every now
+		 * and then in order to exercise this path as well.
+		 */
+		igt_fork_helper(&drop_child) {
+			int fd;
+
+			fd = drm_open_driver(DRIVER_INTEL);
+			igt_require_gem(fd);
+
+			for (;;) {
+				usleep(334e3);
+				igt_debug("Dropping caches...\n");
+				igt_drop_caches_set(fd, DROP_ACTIVE);
+			}
+
+			close(fd);
+		}
+
+		/*
+		 * When memory pressure clients have managed to use up all
+		 * available RAM, let them run for a brief moment yet and then
+		 * manually trigger the OOM condition.
+		 */
+		for (unsigned long ram_mb = 0;
+		     (ram_mb = get_avail_ram_mb()) > 512;) {
+			int status;
+			pid_t pid;
+
+			igt_debug("[%u] %lu MiB free\n", count, ram_mb);
+
+			pid = waitpid(mem_child.pid, &status, WNOHANG);
+			if (pid)
+				break;
+
+			pid = waitpid(gem_child.pid, &status, WNOHANG);
+			if (pid)
+				break;
+
+			pid = waitpid(eb_child.pid, &status, WNOHANG);
+			igt_assert_eq(pid, 0);
+
+			pid = waitpid(drop_child.pid, &status, WNOHANG);
+			igt_assert_eq(pid, 0);
+
+			sleep(1);
+		}
+
+		igt_debug("Triggering OOM...\n");
+		trigger_oom();
+
+		sleep(1);
+
+		igt_try_stop_helper(&mem_child);
+		igt_try_stop_helper(&gem_child);
+		igt_stop_helper(&eb_child);
+		igt_stop_helper(&drop_child);
+	} while (count < passes);
+}
+
 igt_main
 {
 	const struct test {
@@ -432,6 +806,31 @@  igt_main
 	igt_subtest("reclaim")
 		reclaim(I915_EXEC_DEFAULT, 2);
 
+	igt_subtest_group {
+		struct {
+			const char *name;
+			unsigned int flags;
+			unsigned int passes;
+		} *p, passes[] = {
+			{ "reclaims-and-oom-basic", TEST_BO, 1 },
+			{ "reclaims-and-oom", TEST_BO, 3 },
+			{ "reclaims-and-oom-userptr-basic", TEST_USERPTR, 1 },
+			{ "reclaims-and-oom-userptr", TEST_USERPTR, 3 },
+			{ "reclaims-and-oom-both-basic", TEST_BO | TEST_USERPTR, 1 },
+			{ "reclaims-and-oom-both", TEST_BO | TEST_USERPTR, 3 },
+			{ NULL, 0, 0 },
+		};
+
+		igt_fixture {
+			igt_require(has_sysrq_trigger());
+		}
+
+		for (p = passes; p->name; p++) {
+			igt_subtest(p->name)
+				reclaim_oom(p->flags, p->passes);
+		}
+	}
+
 	for(const struct test *t = tests; t->name; t++) {
 		for(const struct mode *m = modes; m->suffix; m++) {
 			igt_subtest_f("%s%s", t->name, m->suffix)
diff --git a/tests/intel-ci/blacklist.txt b/tests/intel-ci/blacklist.txt
index 73d127603d28..b6e302db381e 100644
--- a/tests/intel-ci/blacklist.txt
+++ b/tests/intel-ci/blacklist.txt
@@ -59,7 +59,7 @@  igt@gem_ringfill@(?!.*basic).*
 igt@gem_ring_sync_copy(@.*)?
 igt@gem_ring_sync_loop(@.*)?
 igt@gem_seqno_wrap(@.*)?
-igt@gem_shrink@(?!reclaim$).*
+igt@gem_shrink@(?!reclaim).*
 igt@gem_softpin@.*(hang|S4).*
 igt@gem_spin_batch(@.*)?
 igt@gem_stolen@.*hibernate.*
diff --git a/tests/intel-ci/fast-feedback.testlist b/tests/intel-ci/fast-feedback.testlist
index 6d42792c67f7..0df7cc2fd9fd 100644
--- a/tests/intel-ci/fast-feedback.testlist
+++ b/tests/intel-ci/fast-feedback.testlist
@@ -124,6 +124,9 @@  igt@gem_ringfill@basic-default
 igt@gem_ringfill@basic-default-interruptible
 igt@gem_ringfill@basic-default-forked
 igt@gem_ringfill@basic-default-fd
+igt@gem_shrink@reclaims-and-oom-basic
+igt@gem_shrink@reclaims-and-oom-userptr-basic
+igt@gem_shrink@reclaims-and-oom-both-basic
 igt@gem_sync@basic-all
 igt@gem_sync@basic-each
 igt@gem_sync@basic-many-each