[RFC,2/3] intel-gpu-top: New version using PMU

Submitted by Tvrtko Ursulin on Feb. 14, 2018, 6:52 p.m.

Details

Message ID 20180214185207.8111-3-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series "New intel-gpu-top" ( rev: 1 ) in IGT (deprecated)

Not browsing as part of any series.

Commit Message

Tvrtko Ursulin Feb. 14, 2018, 6:52 p.m.
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tools/.gitignore       |   1 +
 tools/Makefile.am      |   2 +
 tools/Makefile.sources |   1 +
 tools/intel_gpu_top.c  | 593 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools/meson.build      |   1 +
 5 files changed, 598 insertions(+)
 create mode 100644 tools/intel_gpu_top.c

Patch hide | download patch | download mbox

diff --git a/tools/.gitignore b/tools/.gitignore
index 19a1f7cb8e50..6e3042810176 100644
--- a/tools/.gitignore
+++ b/tools/.gitignore
@@ -17,6 +17,7 @@  intel_framebuffer_dump
 intel_gem_info
 intel_gpu_frequency
 intel_gpu_time
+intel_gpu_top
 intel_legacy_top
 intel_gtt
 intel_guc_logger
diff --git a/tools/Makefile.am b/tools/Makefile.am
index dcf282eaff4e..8f6c15791a3b 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -30,6 +30,8 @@  intel_aubdump_la_LDFLAGS = -module -avoid-version -no-undefined
 intel_aubdump_la_SOURCES = aubdump.c
 intel_aubdump_la_LIBADD = $(top_builddir)/lib/libintel_tools.la -ldl
 
+intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la -lm
+
 bin_SCRIPTS = intel_aubdump
 CLEANFILES = $(bin_SCRIPTS)
 
diff --git a/tools/Makefile.sources b/tools/Makefile.sources
index 9699b7d2f737..6f8668bd4d56 100644
--- a/tools/Makefile.sources
+++ b/tools/Makefile.sources
@@ -17,6 +17,7 @@  tools_prog_lists =		\
 	intel_gpu_frequency	\
 	intel_firmware_decode	\
 	intel_gpu_time		\
+	intel_gpu_top		\
 	intel_legacy_top	\
 	intel_gtt		\
 	intel_guc_logger        \
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
new file mode 100644
index 000000000000..59a112240092
--- /dev/null
+++ b/tools/intel_gpu_top.c
@@ -0,0 +1,593 @@ 
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <math.h>
+
+#include "igt_perf.h"
+
+struct pmu_pair {
+	uint64_t cur;
+	uint64_t prev;
+};
+
+struct pmu_counter {
+	uint64_t config;
+	unsigned int idx;
+	struct pmu_pair val;
+};
+
+#define NUM_LOADS (3)
+
+struct engine {
+	const char *name;
+	double qd[3];
+	double load_avg[NUM_LOADS];
+	struct pmu_counter busy;
+	struct pmu_counter wait;
+	struct pmu_counter sema;
+	struct pmu_counter queued;
+	struct pmu_counter runnable;
+	struct pmu_counter running;
+};
+
+struct engines {
+	unsigned int num_engines;
+	unsigned int num_counters;
+	DIR *root;
+	int fd;
+	struct pmu_pair ts;
+
+	int rapl_fd;
+	double rapl_scale;
+
+	struct pmu_counter freq_req;
+	struct pmu_counter freq_act;
+	struct pmu_counter irq;
+	struct pmu_counter rc6;
+	struct pmu_counter rapl;
+
+	double qd_scale;
+
+	double load_exp[NUM_LOADS];
+	double load_avg[NUM_LOADS];
+
+	struct engine engine;
+};
+
+static uint64_t
+get_pmu_config(int dirfd, const char *name, const char *counter)
+{
+	char buf[128], *p;
+	int fd, ret;
+
+	ret = snprintf(buf, sizeof(buf), "%s-%s", name, counter);
+	if (ret < 0 || ret == sizeof(buf))
+		return -1;
+
+	fd = openat(dirfd, buf, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (ret <= 0)
+		return -1;
+
+	p = index(buf, '0');
+	if (!p)
+		return -1;
+
+	return strtoul(p, NULL, 0);
+}
+
+#define engine_ptr(engines, n) \
+	((struct engine *)((unsigned char *)(&engines->engine) + (n) * sizeof(struct engine)))
+
+static struct engines *discover_engines(void)
+{
+	const char *sysfs_root = "/sys/devices/i915/events";
+	struct engines *engines;
+	struct dirent *dent;
+	int ret = 0;
+	DIR *d;
+
+	engines = malloc(sizeof(struct engines));
+	if (!engines)
+		return NULL;
+	memset(engines, 0, sizeof(*engines));
+
+	engines->num_engines = 0;
+
+	d = opendir(sysfs_root);
+	if (!d)
+		return NULL;
+
+	while ((dent = readdir(d)) != NULL) {
+		const char *endswith = "-busy";
+		const unsigned int endlen = strlen(endswith);
+		struct engine *engine =
+				engine_ptr(engines, engines->num_engines);
+		char buf[256];
+
+		if (dent->d_type != DT_REG)
+			continue;
+
+		if (strlen(dent->d_name) >= sizeof(buf)) {
+			ret = -1;
+			break;
+		}
+
+		strcpy(buf, dent->d_name);
+
+		/* xxxN-busy */
+		if (strlen(buf) < (endlen + 4))
+			continue;
+		if (strcmp(&buf[strlen(buf) - endlen], endswith))
+			continue;
+
+		memset(engine, 0, sizeof(*engine));
+
+		buf[strlen(buf) - endlen] = 0;
+		engine->name = strdup(buf);
+		if (!engine->name) {
+			ret = -1;
+			break;
+		}
+
+		engine->busy.config = get_pmu_config(dirfd(d),
+						     engine->name,
+						     "busy");
+		if (engine->busy.config == -1) {
+			ret = -1;
+			break;
+		}
+
+		engines->num_engines++;
+		engines = realloc(engines, sizeof(struct engines) +
+				  engines->num_engines * sizeof(struct engine));
+		if (!engines) {
+			ret = -ENOMEM;
+			break;
+		}
+	}
+
+	if (ret)
+		free(engines);
+	else
+		engines->root = d;
+
+	return ret == 0 ? engines : NULL;
+}
+
+static int
+filename_to_buf(const char *filename, char *buf, unsigned int bufsize)
+{
+	int fd;
+	ssize_t ret;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = read(fd, buf, bufsize - 1);
+	close(fd);
+	if (ret < 1)
+		return -1;
+
+	buf[ret] = '\0';
+
+	return 0;
+}
+
+static uint64_t filename_to_u64(const char *filename, int base)
+{
+	char buf[64], *b;
+
+	if (filename_to_buf(filename, buf, sizeof(buf)))
+		return 0;
+
+	/*
+	 * Handle both single integer and key=value formats by skipping
+	 * leading non-digits.
+	 */
+	b = buf;
+	while (*b && !isdigit(*b))
+		b++;
+
+	return strtoull(b, NULL, base);
+}
+
+static uint64_t rapl_type_id(void)
+{
+	return filename_to_u64("/sys/devices/power/type", 10);
+}
+
+static uint64_t rapl_gpu_power(void)
+{
+	return filename_to_u64("/sys/devices/power/events/energy-gpu", 0);
+}
+
+static double filename_to_double(const char *filename)
+{
+	char buf[64];
+
+	if (filename_to_buf(filename, buf, sizeof(buf)))
+		return 0;
+
+	return strtod(buf, NULL);
+}
+
+static double rapl_gpu_power_scale(void)
+{
+	return filename_to_double("/sys/devices/power/events/energy-gpu.scale");
+}
+
+static double i915_qd_scale(void)
+{
+	return filename_to_double("/sys/devices/i915/events/rcs0-queued.scale");
+}
+
+#define __open_pmu(engines, pmu, idx) \
+({ \
+	int fd__; \
+\
+	fd__ = perf_i915_open_group((pmu)->config, (engines)->fd); \
+	if (fd__ >= 0) { \
+		if ((engines)->fd == -1) \
+			(engines)->fd = fd__; \
+		(pmu)->idx = (idx)++; \
+		(engines)->num_counters++; \
+	} \
+\
+	fd__; \
+})
+
+static int pmu_init(struct engines *engines)
+{
+	unsigned int idx = 0;
+	unsigned int i;
+	int fd;
+
+	engines->fd = -1;
+	engines->num_counters = 0;
+
+	engines->freq_req.config = I915_PMU_REQUESTED_FREQUENCY;
+	fd = __open_pmu(engines, &engines->freq_req, idx);
+	if (fd < 0)
+		return -1;
+
+	engines->freq_act.config = I915_PMU_ACTUAL_FREQUENCY;
+	fd = __open_pmu(engines, &engines->freq_act, idx);
+	if (fd < 0)
+		return -1;
+
+	engines->irq.config = I915_PMU_INTERRUPTS;
+	fd = __open_pmu(engines, &engines->irq, idx);
+	if (fd < 0)
+		return -1;
+
+	engines->rc6.config = I915_PMU_RC6_RESIDENCY;
+	fd = __open_pmu(engines, &engines->rc6, idx);
+	if (fd < 0)
+		return -1;
+
+	engines->qd_scale = i915_qd_scale();
+
+	for (i = 0; i < engines->num_engines; i++) {
+		struct engine *engine = engine_ptr(engines, i);
+		struct {
+			struct pmu_counter *pmu;
+			const char *counter;
+		} *cnt, counters[] = {
+			{ .pmu = &engine->busy, .counter = "busy" },
+			{ .pmu = &engine->wait, .counter = "wait" },
+			{ .pmu = &engine->sema, .counter = "sema" },
+			{ .pmu = &engine->queued, .counter = "queued" },
+			{ .pmu = &engine->runnable, .counter = "runnable" },
+			{ .pmu = &engine->running, .counter = "running" },
+			{ .pmu = NULL, .counter = NULL },
+		};
+
+		for (cnt = counters; cnt->pmu; cnt++) {
+			if (!cnt->pmu->config)
+				cnt->pmu->config =
+					get_pmu_config(dirfd(engines->root),
+						       engine->name,
+						       cnt->counter);
+			fd = __open_pmu(engines, cnt->pmu, idx);
+			if (fd < 0)
+				return -1;
+		}
+	}
+
+	engines->rapl_scale = rapl_gpu_power_scale();
+	if (engines->rapl_scale != NAN)
+		engines->rapl_scale *= 1e3; /* from nano to micro */
+	engines->rapl.config = rapl_gpu_power();
+	engines->rapl_fd = igt_perf_open(rapl_type_id(), engines->rapl.config);
+	if (engines->rapl_fd < 0)
+		return -1;
+
+	return 0;
+}
+
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+
+	assert(read(fd, buf, sizeof(buf)) == sizeof(buf));
+
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+
+	return buf[1];
+}
+
+
+static double pmu_calc(struct pmu_pair *p, double d, double t, double s)
+{
+	double pct;
+
+	pct = p->cur - p->prev;
+	pct /= d;
+	pct /= t;
+	pct *= s;
+
+	if (s == 100.0 && pct > 100.0)
+		pct = 100.0;
+
+	return pct;
+}
+
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+	uint64_t data[2];
+
+	assert(read(fd, data, sizeof(data)) == sizeof(data));
+
+	if (ts)
+		*ts = data[1];
+
+	return data[0];
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+	return __pmu_read_single(fd, NULL);
+}
+
+static void __update_sample(struct pmu_counter *counter, uint64_t val)
+{
+	counter->val.prev = counter->val.cur;
+	counter->val.cur = val;
+}
+
+static void update_sample(struct pmu_counter *counter, uint64_t *val)
+{
+	__update_sample(counter, val[counter->idx]);
+}
+
+static void pmu_sample(struct engines *engines)
+{
+	const int num_val = engines->num_counters;
+	uint64_t val[num_val];
+	unsigned int i;
+
+	engines->ts.prev = engines->ts.cur;
+	engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
+
+	__update_sample(&engines->rapl, pmu_read_single(engines->rapl_fd));
+
+	update_sample(&engines->freq_req, val);
+	update_sample(&engines->freq_act, val);
+	update_sample(&engines->irq, val);
+	update_sample(&engines->rc6, val);
+
+	for (i = 0; i < engines->num_engines; i++) {
+		struct engine *engine = engine_ptr(engines, i);
+
+		update_sample(&engine->busy, val);
+		update_sample(&engine->sema, val);
+		update_sample(&engine->wait, val);
+		update_sample(&engine->queued, val);
+		update_sample(&engine->runnable, val);
+		update_sample(&engine->running, val);
+	}
+}
+
+static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+static void
+print_percentage_bar(double percent, int max_len)
+{
+	int bar_len = percent * (8 * (max_len - 2)) / 100.0;
+	int i;
+
+	putchar('|');
+
+	for (i = bar_len; i >= 8; i -= 8)
+		printf("%s", bars[8]);
+	if (i)
+		printf("%s", bars[i]);
+
+	for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++)
+		putchar(' ');
+
+	putchar('|');
+}
+
+#define DEFAULT_PERIOD_MS (1000)
+
+static void
+usage(const char *appname)
+{
+	printf("intel_gpu_top - Display a top-like summary of Intel GPU usage\n"
+		"\n"
+		"Usage: %s [parameters]\n"
+		"\n"
+		"\tThe following parameters are optional:\n"
+		"\t[-s <samples>]       refresh period in ms (default %ums)\n"
+		"\t[-h]                 show this help text\n"
+		"\n",
+		appname, DEFAULT_PERIOD_MS);
+}
+
+static double update_load(double load, double exp, double val)
+{
+	return val + exp * (load - val);
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
+	const double load_period[NUM_LOADS] = { 1.0, 30.0, 900.0 };
+	struct engines *engines;
+	int con_w = -1, con_h = -1;
+	struct winsize ws;
+	unsigned int i;
+	double period;
+	int ret, ch;
+
+	/* Parse options */
+	while ((ch = getopt(argc, argv, "s:h")) != -1) {
+		switch (ch) {
+		case 's':
+			period_us = atoi(optarg) * 1000;
+			break;
+		case 'h':
+			usage(argv[0]);
+			exit(0);
+		default:
+			fprintf(stderr, "Invalid option %c!\n", (char)optopt);
+			usage(argv[0]);
+			exit(1);
+		}
+	}
+
+	/* Get terminal size. */
+	if (ioctl(0, TIOCGWINSZ, &ws) != -1) {
+		con_w = ws.ws_col;
+		con_h = ws.ws_row;
+	}
+
+	engines = discover_engines();
+	if (!engines) {
+		fprintf(stderr, "Failed to detect engines!\n");
+		return 1;
+	}
+
+	ret = pmu_init(engines);
+	if (ret) {
+		fprintf(stderr, "Failed to initialize PMU!\n");
+		return 1;
+	}
+
+	/* Load average setup. */
+	period = (double)period_us / 1e6;
+	for (i = 0; i < NUM_LOADS; i++)
+		engines->load_exp[i] = exp(-period / load_period[i]);
+
+	pmu_sample(engines);
+
+	for (;;) {
+		double t, freq[2], irq, rc6, power;
+		double qd = 0;
+		int lines = 0;
+		unsigned int j;
+
+		usleep(period_us);
+
+		pmu_sample(engines);
+		t = (double)(engines->ts.cur - engines->ts.prev) / 1e9;
+
+		printf("\033[H\033[J");
+
+		freq[0] = pmu_calc(&engines->freq_req.val, 1.0, t, 1);
+		freq[1] = pmu_calc(&engines->freq_act.val, 1.0, t, 1);
+		irq = pmu_calc(&engines->irq.val, 1.0, t, 1);
+		rc6 = pmu_calc(&engines->rc6.val, 1e9, t, 100);
+		power = pmu_calc(&engines->rapl.val, 1.0, t, engines->rapl_scale);
+
+		for (i = 0; i < engines->num_engines; i++) {
+			struct engine *engine = engine_ptr(engines, i);
+
+			engine->qd[0] = pmu_calc(&engine->queued.val, 1, t,
+						 engines->qd_scale);
+			engine->qd[1] = pmu_calc(&engine->runnable.val, 1, t,
+						 engines->qd_scale);
+			engine->qd[2] = pmu_calc(&engine->running.val, 1, t,
+						 engines->qd_scale);
+
+			qd += engine->qd[1] + engine->qd[2];
+
+			for (j = 0; j < NUM_LOADS; j++) {
+				engine->load_avg[j] =
+					update_load(engine->load_avg[j],
+						    engines->load_exp[j],
+						    engine->qd[1] +
+						    engine->qd[2]);
+			}
+		}
+
+		for (j = 0; j < NUM_LOADS; j++) {
+			engines->load_avg[j] =
+				update_load(engines->load_avg[j],
+					    engines->load_exp[j],
+					    qd);
+		}
+
+		printf("intel-gpu-top - load avg %5.2f, %5.2f, %5.2f; %4.0f/%4.0f MHz;  %3.0f%% RC6; %6.0fmW; %8.0f irqs/s\n",
+		       engines->load_avg[0],
+		       engines->load_avg[1],
+		       engines->load_avg[2],
+		       freq[0], freq[1],
+		       rc6, power, irq);
+		lines++;
+
+		printf("\n");
+		lines++;
+
+		for (i = 0; i < engines->num_engines && lines < con_h; i++) {
+			struct engine *engine = engine_ptr(engines, i);
+			unsigned int max_w = con_w - 1;
+			unsigned int len;
+			double val[2];
+			char buf[128];
+
+			val[0] = pmu_calc(&engine->wait.val, 1e9, t, 100);
+			val[1] = pmu_calc(&engine->sema.val, 1e9, t, 100);
+			len = snprintf(buf, sizeof(buf),
+				       "%6.2f%% wait, %6.2f%% sema",
+				       val[0], val[1]);
+
+			val[0] = pmu_calc(&engine->busy.val, 1e9, t, 100);
+			len += printf("%8s %6.2f%% (%5.2f/%5.2f/%5.2f) ",
+				      engine->name,
+				      val[0],
+				      engine->qd[0],
+				      engine->qd[1],
+				      engine->qd[2]);
+			print_percentage_bar(val[0], max_w - len);
+
+			printf("%s\n", buf);
+
+			lines++;
+		}
+
+		printf("\n");
+	}
+
+	return 0;
+}
diff --git a/tools/meson.build b/tools/meson.build
index ebce4e305d00..36038f7a9d22 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -23,6 +23,7 @@  tools_progs = [
 	'intel_gpu_frequency',
 	'intel_firmware_decode',
 	'intel_gpu_time',
+	'intel_gpu-top',
 	'intel_legacy_top',
 	'intel_gtt',
 	'intel_guc_logger',