[RFT,i-g-t,1/2] tests/gem_shrink: Background, direct and OOM shrinker plus userptr tests

Submitted by Tvrtko Ursulin on Dec. 7, 2018, 2:04 p.m.

Details

Message ID 20181207140405.15259-1-tvrtko.ursulin@linux.intel.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Tvrtko Ursulin Dec. 7, 2018, 2:04 p.m.
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

...

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/igt_core.c          |  18 +++
 lib/igt_core.h          |   1 +
 tests/i915/gem_shrink.c | 299 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 318 insertions(+)

Patch hide | download patch | download mbox

diff --git a/lib/igt_core.c b/lib/igt_core.c
index 64883d6402af..d8fa0c83e279 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -1680,6 +1680,24 @@  void igt_stop_helper(struct igt_helper_process *proc)
 	assert(helper_was_alive(proc, status));
 }
 
+/**
+ * igt_try_stop_helper:
+ * @proc: #igt_helper_process structure
+ *
+ * Terminates a helper process if it is still running.
+ */
+void igt_try_stop_helper(struct igt_helper_process *proc)
+{
+	int status;
+
+	/* failure here means the pid is already dead and so waiting is safe */
+	kill(proc->pid, proc->use_SIGKILL ? SIGKILL : SIGTERM);
+
+	status = igt_wait_helper(proc);
+	if (!helper_was_alive(proc, status))
+		igt_debug("Helper died too early with status=%d\n", status);
+}
+
 static void children_exit_handler(int sig)
 {
 	int status;
diff --git a/lib/igt_core.h b/lib/igt_core.h
index 6f8c3852a686..beec34667524 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -795,6 +795,7 @@  bool __igt_fork_helper(struct igt_helper_process *proc);
 	for (; __igt_fork_helper(proc); exit(0))
 int igt_wait_helper(struct igt_helper_process *proc);
 void igt_stop_helper(struct igt_helper_process *proc);
+void igt_try_stop_helper(struct igt_helper_process *proc);
 
 /* exit handler code */
 
diff --git a/tests/i915/gem_shrink.c b/tests/i915/gem_shrink.c
index c8e05814ee70..2c8e8f9453d2 100644
--- a/tests/i915/gem_shrink.c
+++ b/tests/i915/gem_shrink.c
@@ -26,6 +26,9 @@ 
  *
  * Exercise the shrinker by overallocating GEM objects
  */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 #include "igt.h"
 #include "igt_gt.h"
@@ -366,6 +369,287 @@  static void reclaim(unsigned engine, int timeout)
 	close(fd);
 }
 
+static unsigned long get_meminfo(const char *info, const char *tag)
+{
+	const char *str;
+	unsigned long val;
+
+	str = strstr(info, tag);
+	if (str && sscanf(str + strlen(tag), " %lu", &val) == 1)
+		return val >> 10;
+
+	igt_warn("Unrecognised /proc/meminfo field: '%s'\n", tag);
+	return 0;
+}
+
+static unsigned long get_avail_ram_mb(void)
+{
+	int fd;
+	int ret;
+	char buf[4096];
+	unsigned long ram;
+
+	fd = open("/proc/meminfo", O_RDONLY);
+	igt_assert_fd(fd);
+
+	ret = read(fd, buf, sizeof(buf));
+	igt_assert(ret >= 0);
+
+	close(fd);
+
+	ram = get_meminfo(buf, "MemAvailable:");
+	ram += get_meminfo(buf, "Buffers:");
+	ram += get_meminfo(buf, "Cached:");
+	ram += get_meminfo(buf, "SwapCached:");
+
+	return ram;
+}
+
+struct test {
+#define TEST_BO		(1)
+#define TEST_USERPTR	(2)
+	unsigned int flags;
+	int fd;
+};
+
+static uint32_t __get_pages(int fd, unsigned long alloc)
+{
+	uint32_t handle = gem_create(fd, alloc);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+
+	return handle;
+}
+
+struct test_obj {
+	void *ptr;
+	uint32_t handle;
+};
+
+static void
+__get_userptr(int fd, struct test_obj *obj, unsigned long sz)
+{
+	struct local_i915_gem_userptr userptr = { };
+	void *ptr;
+
+	igt_assert_eq(sz & 4095, 0);
+
+	ptr = mmap(NULL, sz, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	assert(ptr != MAP_FAILED);
+
+	userptr.user_size = sz;
+	userptr.user_ptr = to_user_pointer(ptr);
+	do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
+
+	gem_set_domain(fd, userptr.handle, I915_GEM_DOMAIN_GTT, 0);
+	gem_madvise(fd, userptr.handle, I915_MADV_DONTNEED);
+
+	obj->ptr = ptr;
+	obj->handle = userptr.handle;
+}
+
+#define PAGE_SIZE 4096
+static void *mempressure(void *arg)
+{
+	struct test_obj *list = NULL;
+	struct test *test = arg;
+	const unsigned int sz_mb = 2;
+	const unsigned int sz = sz_mb << 20;
+	unsigned int n = 0, max = 0;
+	unsigned int blocks;
+
+	while (true) {
+		unsigned long ram_mb = get_avail_ram_mb();
+
+		if (!list) {
+			blocks = ram_mb / sz_mb;
+			list = calloc(blocks, sizeof(*list));
+			igt_assert(list);
+		} else if (ram_mb < 256) {
+			blocks = max + 1;
+		}
+
+		if (list[n].ptr || list[n].handle) {
+			if (test->flags & TEST_USERPTR) {
+				munmap(list[n].ptr, sz);
+				gem_close(test->fd, list[n].handle);
+			} else if (test->flags & TEST_BO) {
+				gem_close(test->fd, list[n].handle);
+			} else {
+				munmap(list[n].ptr, sz);
+			}
+		}
+
+		if (test->flags & TEST_BO) {
+			list[n].handle = __get_pages(test->fd, sz);
+		} else if (test->flags & TEST_USERPTR) {
+			__get_userptr(test->fd, &list[n], sz);
+		} else {
+			list[n].ptr = mmap(NULL, sz, PROT_WRITE,
+					   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+			assert(list[n].ptr != MAP_FAILED);
+
+			madvise(list[n].ptr, sz, MADV_HUGEPAGE);
+
+			for (size_t page = 0; page < sz; page += PAGE_SIZE)
+				*(volatile uint32_t *)((unsigned char *)list[n].ptr + page) = 0;
+		}
+
+		if (n > max)
+			max = n;
+
+		n++;
+
+		if (n >= blocks)
+			n = 0;
+	}
+
+	return NULL;
+}
+
+static void oom_adjust(const char *score)
+{
+	int fd;
+
+	fd = open("/proc/self/oom_score_adj", O_WRONLY);
+	igt_assert_fd(fd);
+	igt_assert(write(fd, score, sizeof(score)) == sizeof(score));
+	close(fd);
+}
+
+static void trigger_oom(void)
+{
+	const char *cmd = "f";
+	int fd;
+
+	fd = open("/proc/sysrq-trigger", O_WRONLY);
+	igt_assert_fd(fd);
+	igt_assert(write(fd, cmd, sizeof(cmd)) == sizeof(cmd));
+	close(fd);
+}
+
+static bool has_sysrq_trigger(void)
+{
+	int fd;
+
+	fd = open("/proc/sysrq-trigger", O_WRONLY);
+	close(fd);
+
+	return fd >= 0;
+}
+
+static void reclaim_oom(unsigned int flags)
+{
+	unsigned int count = 0;
+
+	igt_assert_eq(__builtin_popcount(flags), 1);
+
+	oom_adjust("-1000");
+
+	do {
+		struct igt_helper_process gem_child = { .use_SIGKILL = true };
+		struct igt_helper_process mem_child = { .use_SIGKILL = true };
+		struct igt_helper_process eb_child = { .use_SIGKILL = true };
+		struct igt_helper_process drop_child = { .use_SIGKILL = true };
+
+		igt_debug("Iteration %u...\n", ++count);
+
+		igt_fork_helper(&mem_child) {
+			struct test test = { };
+
+			oom_adjust("500");
+			mempressure(&test);
+		}
+
+		igt_fork_helper(&gem_child) {
+			struct test test = { .flags = flags };
+
+			oom_adjust("500");
+
+			test.fd = drm_open_driver_render(DRIVER_INTEL);
+			igt_require_gem(test.fd);
+
+			mempressure(&test);
+
+			close(test.fd);
+		}
+
+		igt_fork_helper(&eb_child) {
+			struct test test = { .flags = flags };
+			const uint32_t bbe = MI_BATCH_BUFFER_END;
+			struct drm_i915_gem_exec_object2 obj = { };
+			struct drm_i915_gem_execbuffer2 execbuf = { };
+
+			oom_adjust("500");
+
+			execbuf.buffers_ptr = to_user_pointer(&obj);
+			execbuf.buffer_count = 1;
+
+			test.fd = drm_open_driver_render(DRIVER_INTEL);
+			igt_require_gem(test.fd);
+
+			for (;;) {
+				unsigned long eb = 0;
+				struct timespec ts = { };
+				unsigned long sec, last = 0;
+
+				igt_nsec_elapsed(&ts);
+
+				for (;;) {
+					obj.handle = gem_create(test.fd, 4096);
+					gem_write(test.fd, obj.handle, 0, &bbe,
+						sizeof(bbe));
+					gem_execbuf(test.fd, &execbuf);
+					eb++;
+					sec = igt_nsec_elapsed(&ts) / 1e9;
+					if (sec > last)
+						gem_sync(test.fd, obj.handle);
+					gem_close(test.fd, obj.handle);
+					if ((last - sec) > 1)
+						break;
+					last = sec;
+				}
+
+				igt_debug("%lu eb\n", eb);
+				usleep(500e3);
+			}
+
+			close(test.fd);
+		}
+
+		igt_fork_helper(&drop_child) {
+			int fd;
+
+			fd = drm_open_driver(DRIVER_INTEL);
+			igt_require_gem(fd);
+
+			for (;;) {
+				usleep(334e3);
+				igt_drop_caches_set(fd, DROP_ACTIVE);
+			}
+
+			close(fd);
+		}
+
+		for (unsigned long ram_mb = 0;
+		     (ram_mb = get_avail_ram_mb()) > 512;) {
+			igt_debug("[%u] %lu free mb\n", count, ram_mb);
+			sleep(1);
+		}
+
+		igt_debug("Triggering OOM\n");
+		trigger_oom();
+
+		sleep(1);
+
+		igt_try_stop_helper(&mem_child);
+		igt_try_stop_helper(&gem_child);
+		igt_try_stop_helper(&eb_child);
+		igt_try_stop_helper(&drop_child);
+	} while (count < 3);
+}
+
 igt_main
 {
 	const struct test {
@@ -432,6 +716,21 @@  igt_main
 	igt_subtest("reclaim")
 		reclaim(I915_EXEC_DEFAULT, 2);
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(has_sysrq_trigger());
+		}
+
+		igt_subtest("two-reclaims-and-oom")
+			reclaim_oom(TEST_BO);
+
+		igt_subtest("two-reclaims-and-oom-userptr") {
+			igt_require(has_userptr());
+
+			reclaim_oom(TEST_USERPTR);
+		}
+	}
+
 	for(const struct test *t = tests; t->name; t++) {
 		for(const struct mode *m = modes; m->suffix; m++) {
 			igt_subtest_f("%s%s", t->name, m->suffix)

Comments

Chris Wilson Dec. 7, 2018, 2:06 p.m.
Quoting Tvrtko Ursulin (2018-12-07 14:04:05)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> ...
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  lib/igt_core.c          |  18 +++
>  lib/igt_core.h          |   1 +
>  tests/i915/gem_shrink.c | 299 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 318 insertions(+)
> 
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 64883d6402af..d8fa0c83e279 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -1680,6 +1680,24 @@ void igt_stop_helper(struct igt_helper_process *proc)
>         assert(helper_was_alive(proc, status));
>  }
>  
> +/**
> + * igt_try_stop_helper:
> + * @proc: #igt_helper_process structure
> + *
> + * Terminates a helper process if it is still running.
> + */
> +void igt_try_stop_helper(struct igt_helper_process *proc)

General thoughtless comment about try_func is that usually report a
bool.
-Chris
Tvrtko Ursulin Dec. 7, 2018, 2:13 p.m.
On 07/12/2018 14:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-12-07 14:04:05)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> ...
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   lib/igt_core.c          |  18 +++
>>   lib/igt_core.h          |   1 +
>>   tests/i915/gem_shrink.c | 299 ++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 318 insertions(+)
>>
>> diff --git a/lib/igt_core.c b/lib/igt_core.c
>> index 64883d6402af..d8fa0c83e279 100644
>> --- a/lib/igt_core.c
>> +++ b/lib/igt_core.c
>> @@ -1680,6 +1680,24 @@ void igt_stop_helper(struct igt_helper_process *proc)
>>          assert(helper_was_alive(proc, status));
>>   }
>>   
>> +/**
>> + * igt_try_stop_helper:
>> + * @proc: #igt_helper_process structure
>> + *
>> + * Terminates a helper process if it is still running.
>> + */
>> +void igt_try_stop_helper(struct igt_helper_process *proc)
> 
> General thoughtless comment about try_func is that usually report a
> bool.

Okay, another TODO item. First I wanted to call them __igt_stop_helper 
which would have avoided it. :)

But in general no need to pay too much attention for now, I am only 
using it for CI access.

Hitting the nested lock path is proving to be tricky, locally it only 
manages a handful of times per run. And since it is nested we cannot 
count on lockdep to stop things. Userptr half also regressed in so it's 
not triggering much shrinking any longer. :I

So perhaps strength in (CI) numbers shows something new. Or not..

Regards,

Tvrtko