Add a test that measures primitive rate

Submitted by Marek Olšák on May 25, 2019, 12:51 a.m.

Details

Message ID 20190525005149.10397-1-maraeo@gmail.com
State New
Headers show
Series "Add a test that measures primitive rate" ( rev: 1 ) in Piglit

Not browsing as part of any series.

Commit Message

Marek Olšák May 25, 2019, 12:51 a.m.
From: Marek Olšák <marek.olsak@amd.com>

The output looks like this (from the initial version, the current version is slightly different):
  Measuring GPrims/second,             , Number of primitives
  Draw Call     ,  Cull Method         ,    2K,    4K,    8K,   16K,   32K,   64K,  256K
  --------------,----------------------,------,------,------,------,------,------,------
  glDrawElements, none                 ,  2.80,  2.69,  2.91,  2.89,  2.91,  2.92,  2.42
  glDrawElements, rasterizer discard   ,  4.80,  4.77,  4.95,  4.84,  4.91,  4.85,  4.93
  glDrawElements, 100% back faces      ,  3.27,  3.19,  3.29,  3.21,  3.26,  3.32,  3.33
  glDrawElements,  75% back faces      ,  3.27,  3.47,  3.29,  3.54,  3.53,  3.60,  3.49
  glDrawElements,  50% back faces      ,  3.92,  3.83,  3.34,  3.58,  3.68,  3.63,  2.76
  glDrawElements,  25% back faces      ,  3.66,  3.52,  3.12,  3.18,  3.00,  2.78,  3.45
  glDrawElements, 100% culled by view  ,  4.85,  4.75,  4.94,  4.68,  4.91,  4.80,  4.94
  glDrawElements,  75% culled by view  ,  4.82,  4.68,  4.77,  4.76,  4.80,  4.65,  3.20
  glDrawElements,  50% culled by view  ,  4.73,  4.65,  4.46,  3.40,  4.86,  4.04,  2.99
  glDrawElements,  25% culled by view  ,  3.67,  3.48,  3.26,  2.70,  2.76,  2.60,  2.46
  glDrawElements, 100% degenerate prims,  1.67,  1.66,  1.68,  1.66,  1.68,  1.68,  1.68
  glDrawElements,  75% degenerate prims,  1.65,  1.90,  1.67,  1.96,  1.86,  2.01,  1.83
  glDrawElements,  50% degenerate prims,  2.43,  2.37,  1.66,  2.44,  1.90,  2.24,  1.98
  glDrawElements,  25% degenerate prims,  2.49,  2.94,  1.67,  2.03,  2.76,  2.79,  2.15
  glDrawElements, 98 small prims/pixel ,  4.82,  4.65,  4.85,  4.80,  4.90,  4.77,  4.30
  glDrawElements, 32 small prims/pixel ,  4.86,  4.71,  4.80,  4.69,  4.81,  4.16,  4.95
  glDrawElements,  8 small prims/pixel ,  4.73,  4.67,  4.92,  4.85,  4.91,  4.86,  3.20
  glDrawArrays  , none                 ,  1.67,  1.66,  1.65,  1.47,  1.52,  1.24,  1.60
etc.
---
 tests/perf/CMakeLists.gl.txt |   1 +
 tests/perf/common.c          |   5 +-
 tests/perf/common.h          |   2 +-
 tests/perf/draw-prim-rate.c  | 519 +++++++++++++++++++++++++++++++++++
 tests/perf/drawoverhead.c    |   2 +-
 5 files changed, 524 insertions(+), 5 deletions(-)
 create mode 100644 tests/perf/draw-prim-rate.c

Patch hide | download patch | download mbox

diff --git a/tests/perf/CMakeLists.gl.txt b/tests/perf/CMakeLists.gl.txt
index f9d311525..6f9c2c565 100644
--- a/tests/perf/CMakeLists.gl.txt
+++ b/tests/perf/CMakeLists.gl.txt
@@ -3,12 +3,13 @@  include_directories(
 	${GLEXT_INCLUDE_DIR}
 	${OPENGL_INCLUDE_PATH}
 )
 
 link_libraries (
 	piglitutil_${piglit_target_api}
 	${OPENGL_gl_LIBRARY}
 )
 
 piglit_add_executable (drawoverhead drawoverhead.c common.c)
+piglit_add_executable (draw-prim-rate draw-prim-rate.c common.c)
 
 # vim: ft=cmake:
diff --git a/tests/perf/common.c b/tests/perf/common.c
index 791b7f943..d61dfdca3 100644
--- a/tests/perf/common.c
+++ b/tests/perf/common.c
@@ -31,42 +31,41 @@  static double
 perf_get_time(void)
 {
 	return piglit_time_get_nano() * 0.000000001;
 }
 
 /**
  * Run function 'f' for enough iterations to reach a steady state.
  * Return the rate (iterations/second).
  */
 double
-perf_measure_rate(perf_rate_func f)
+perf_measure_rate(perf_rate_func f, double minDuration)
 {
-	const double minDuration = 0.5;
 	double rate = 0.0, prevRate = 0.0;
 	unsigned subiters;
 
 	/* Compute initial number of iterations to try.
 	 * If the test function is pretty slow this helps to avoid
 	 * extraordinarily long run times.
 	 */
 	subiters = 2;
 	{
 		const double t0 = perf_get_time();
 		double t1;
 		do {
 			f(subiters); /* call the rendering function */
 			glFinish();
 			t1 = perf_get_time();
 			subiters *= 2;
 		} while (t1 - t0 < 0.1 * minDuration);
 	}
-	/*perf_printf("initial subIters = %u\n", subiters);*/
+	/*printf("initial subIters = %u\n", subiters);*/
 
 	while (1) {
 		const double t0 = perf_get_time();
 		unsigned iters = 0;
 		double t1;
 
 		do {
 			f(subiters); /* call the rendering function */
 			glFinish();
 			t1 = perf_get_time();
diff --git a/tests/perf/common.h b/tests/perf/common.h
index 0da3b7b5f..7b7856b4a 100644
--- a/tests/perf/common.h
+++ b/tests/perf/common.h
@@ -18,14 +18,14 @@ 
  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef COMMON_H
 #define COMMON_H
 
 typedef void (*perf_rate_func)(unsigned count);
 
 double
-perf_measure_rate(perf_rate_func f);
+perf_measure_rate(perf_rate_func f, double minDuration);
 
 #endif /* COMMON_H */
 
diff --git a/tests/perf/draw-prim-rate.c b/tests/perf/draw-prim-rate.c
new file mode 100644
index 000000000..85792b00e
--- /dev/null
+++ b/tests/perf/draw-prim-rate.c
@@ -0,0 +1,519 @@ 
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Measure primitive rate under various circumstances.
+ *
+ * Culling methods:
+ * - none
+ * - rasterizer discard
+ * - face culling
+ * - view culling
+ * - degenerate primitives
+ * - subpixel primitives
+ */
+
+#include "common.h"
+#include <stdbool.h>
+#undef NDEBUG
+#include <assert.h>
+#include "piglit-util-gl.h"
+
+/* this must be a power of two to prevent precision issues */
+#define WINDOW_SIZE 1024
+
+PIGLIT_GL_TEST_CONFIG_BEGIN
+
+	config.supports_gl_compat_version = 10;
+	config.window_width = WINDOW_SIZE;
+	config.window_height = WINDOW_SIZE;
+	config.window_visual = PIGLIT_GL_VISUAL_RGBA | PIGLIT_GL_VISUAL_DOUBLE;
+
+PIGLIT_GL_TEST_CONFIG_END
+
+static unsigned gpu_freq_mhz;
+static GLint progs[3];
+
+void
+piglit_init(int argc, char **argv)
+{
+	for (unsigned i = 1; i < argc; i++) {
+		if (strncmp(argv[i], "-freq=", 6) == 0)
+			sscanf(argv[i] + 6, "%u", &gpu_freq_mhz);
+	}
+
+	piglit_require_gl_version(32);
+
+	progs[0] = piglit_build_simple_program(
+			  "#version 120 \n"
+			  "void main() { \n"
+			  "  gl_Position = gl_Vertex; \n"
+			  "}",
+
+			  "#version 120 \n"
+			  "void main() { \n"
+			  "  gl_FragColor = vec4(1.0); \n"
+			  "}");
+
+	progs[1] = piglit_build_simple_program(
+			  "#version 150 compatibility \n"
+			  "varying vec4 v[4]; \n"
+			  "attribute vec4 a[4]; \n"
+			  "void main() { \n"
+			  "  for (int i = 0; i < 4; i++) v[i] = a[i]; \n"
+			  "  gl_Position = gl_Vertex; \n"
+			  "}",
+
+			  "#version 150 compatibility \n"
+			  "varying vec4 v[4]; \n"
+			  "void main() { \n"
+			  "  gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n"
+			  "}");
+
+	progs[2] = piglit_build_simple_program(
+			  "#version 150 compatibility \n"
+			  "varying vec4 v[8]; \n"
+			  "attribute vec4 a[8]; \n"
+			  "void main() { \n"
+			  "  for (int i = 0; i < 8; i++) v[i] = a[i]; \n"
+			  "  gl_Position = gl_Vertex; \n"
+			  "}",
+
+			  "#version 150 compatibility \n"
+			  "varying vec4 v[8]; \n"
+			  "void main() { \n"
+			  "  gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3] + v[4] + v[5] + v[6] + v[7], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n"
+			  "}");
+
+	glEnableClientState(GL_VERTEX_ARRAY);
+	glEnable(GL_CULL_FACE);
+}
+
+static void
+gen_triangle_tile(unsigned num_quads_per_dim, double prim_size_in_pixels,
+		  unsigned cull_percentage,
+		  bool back_face_culling, bool view_culling, bool degenerate_prims,
+		  unsigned max_vertices, unsigned *num_vertices, float *vertices,
+		  unsigned max_indices, unsigned *num_indices, unsigned *indices)
+{
+	/* clip space coordinates in both X and Y directions: */
+	const double first = -1;
+	const double max_length = 2;
+	const double d = prim_size_in_pixels * 2.0 / WINDOW_SIZE;
+
+	assert(d * num_quads_per_dim <= max_length);
+	assert(*num_vertices == 0);
+
+	/* the vertex ordering is counter-clockwise */
+	for (unsigned ty = 0; ty < num_quads_per_dim; ty++) {
+		bool cull;
+
+		if (cull_percentage == 0)
+			cull = false;
+		else if (cull_percentage == 25)
+			cull = ty % 4 == 0;
+		else if (cull_percentage == 50)
+			cull = ty % 2 == 0;
+		else if (cull_percentage == 75)
+			cull = ty % 4 != 0;
+		else if (cull_percentage == 100)
+			cull = true;
+		else
+			assert(!"wrong cull_percentage");
+
+		for (unsigned tx = 0; tx < num_quads_per_dim; tx++) {
+			unsigned x = tx;
+			unsigned y = ty;
+
+			/* view culling in different directions */
+			double xoffset = 0, yoffset = 0, zoffset = 0;
+
+			if (cull && view_culling) {
+				unsigned side = (ty / 2) % 4;
+
+				if (side == 0)		xoffset = -2;
+				else if (side == 1)	xoffset =  2;
+				else if (side == 2)	yoffset = -2;
+				else if (side == 3)	yoffset =  2;
+			}
+
+			if (indices) {
+				unsigned elem = *num_vertices * 3;
+
+				/* generate horizontal stripes with maximum reuse */
+				if (x == 0) {
+					*num_vertices += 2;
+					assert(*num_vertices <= max_vertices);
+
+					vertices[elem++] = xoffset + first + d * x;
+					vertices[elem++] = yoffset + first + d * y;
+					vertices[elem++] = zoffset;
+
+					vertices[elem++] = xoffset + first + d * x;
+					vertices[elem++] = yoffset + first + d * (y + 1);
+					vertices[elem++] = zoffset;
+				}
+
+				int base_index = *num_vertices;
+
+				*num_vertices += 2;
+				assert(*num_vertices <= max_vertices);
+
+				vertices[elem++] = xoffset + first + d * (x + 1);
+				vertices[elem++] = yoffset + first + d * y;
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * (x + 1);
+				vertices[elem++] = yoffset + first + d * (y + 1);
+				vertices[elem++] = zoffset;
+
+				/* generate indices */
+				unsigned idx = *num_indices;
+				*num_indices += 6;
+				assert(*num_indices <= max_indices);
+
+				indices[idx++] = base_index - 2;
+				indices[idx++] = base_index;
+				indices[idx++] = base_index - 1;
+
+				indices[idx++] = base_index - 1;
+				indices[idx++] = base_index;
+				indices[idx++] = base_index + 1;
+
+				if (cull && back_face_culling) {
+					/* switch the winding order */
+					unsigned tmp = indices[idx - 6];
+					indices[idx - 6] = indices[idx - 5];
+					indices[idx - 5] = tmp;
+
+					tmp = indices[idx - 3];
+					indices[idx - 3] = indices[idx - 2];
+					indices[idx - 2] = tmp;
+				}
+
+				if (cull && degenerate_prims) {
+					indices[idx - 5] = indices[idx - 4];
+					indices[idx - 2] = indices[idx - 1];
+				}
+			} else {
+				unsigned elem = *num_vertices * 3;
+				*num_vertices += 6;
+				assert(*num_vertices <= max_vertices);
+
+				vertices[elem++] = xoffset + first + d * x;
+				vertices[elem++] = yoffset + first + d * y;
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * (x + 1);
+				vertices[elem++] = yoffset + first + d * y;
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * x;
+				vertices[elem++] = yoffset + first + d * (y + 1);
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * x;
+				vertices[elem++] = yoffset + first + d * (y + 1);
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * (x + 1);
+				vertices[elem++] = yoffset + first + d * y;
+				vertices[elem++] = zoffset;
+
+				vertices[elem++] = xoffset + first + d * (x + 1);
+				vertices[elem++] = yoffset + first + d * (y + 1);
+				vertices[elem++] = zoffset;
+
+				if (cull && back_face_culling) {
+					/* switch the winding order */
+					float old[6*3];
+					memcpy(old, vertices + elem - 6*3, 6*3*4);
+
+					for (unsigned i = 0; i < 6; i++) {
+						vertices[elem - 6*3 + i*3 + 0] = old[(5 - i)*3 + 0];
+						vertices[elem - 6*3 + i*3 + 1] = old[(5 - i)*3 + 1];
+						vertices[elem - 6*3 + i*3 + 2] = old[(5 - i)*3 + 2];
+					}
+				}
+
+				if (cull && degenerate_prims) {
+					/* use any previously generated vertices */
+					unsigned v0 = rand() % *num_vertices;
+					unsigned v1 = rand() % *num_vertices;
+
+					memcpy(&vertices[elem - 5*3], &vertices[v0*3], 12);
+					memcpy(&vertices[elem - 4*3], &vertices[v0*3], 12);
+
+					memcpy(&vertices[elem - 2*3], &vertices[v1*3], 12);
+					memcpy(&vertices[elem - 1*3], &vertices[v1*3], 12);
+				}
+			}
+		}
+	}
+}
+
+static bool is_indexed;
+static unsigned count;
+static unsigned num_duplicates;
+static unsigned duplicate_index;
+static unsigned vb_size, ib_size;
+
+static void
+run_draw(unsigned iterations)
+{
+	for (unsigned i = 0; i < iterations; i++) {
+		glVertexPointer(3, GL_FLOAT, 0,
+				(void*)(long)(vb_size * duplicate_index));
+
+		if (is_indexed) {
+			glDrawElements(GL_TRIANGLES, count,
+				       GL_UNSIGNED_INT,
+				       (void*)(long)(ib_size * duplicate_index));
+		} else {
+			glDrawArrays(GL_TRIANGLES, 0, count);
+		}
+
+		duplicate_index = (duplicate_index + 1) % num_duplicates;
+	}
+}
+
+enum cull_method {
+	NONE,
+	BACK_FACE_CULLING,
+	VIEW_CULLING,
+	SUBPIXEL_PRIMS,
+	RASTERIZER_DISCARD,
+	DEGENERATE_PRIMS,
+	NUM_CULL_METHODS,
+};
+
+static double
+run_test(unsigned debug_num_iterations, bool indexed, enum cull_method cull_method,
+	 unsigned num_quads_per_dim, double quad_size_in_pixels,
+	 unsigned cull_percentage)
+{
+	const unsigned max_indices = 8100000 * 3;
+	const unsigned max_vertices = max_indices;
+
+	while (num_quads_per_dim * quad_size_in_pixels >= WINDOW_SIZE)
+		quad_size_in_pixels *= 0.5;
+
+	/* Generate vertices. */
+	float *vertices = (float*)malloc(max_vertices * 12);
+	unsigned *indices = NULL;
+
+	if (indexed)
+		indices = (unsigned*)malloc(max_indices * 4);
+
+	unsigned num_vertices = 0, num_indices = 0;
+	gen_triangle_tile(num_quads_per_dim, quad_size_in_pixels,
+			  cull_percentage,
+			  cull_method == BACK_FACE_CULLING,
+			  cull_method == VIEW_CULLING,
+			  cull_method == DEGENERATE_PRIMS,
+			  max_vertices, &num_vertices, vertices,
+			  max_indices, &num_indices, indices);
+
+	vb_size = num_vertices * 12;
+	ib_size = num_indices * 4;
+
+	/* Duplicate buffers and switch between them, so that no data is cached
+	 * between draws. 32 MB should be greater than any cache.
+	 */
+	num_duplicates = MAX2(1, 32*1024*1024 / vb_size);
+
+	/* Create buffers. */
+	GLuint vb, ib;
+	glGenBuffers(1, &vb);
+	glBindBuffer(GL_ARRAY_BUFFER, vb);
+	glBufferData(GL_ARRAY_BUFFER,
+		     vb_size * num_duplicates, NULL, GL_STATIC_DRAW);
+	for (unsigned i = 0; i < num_duplicates; i++)
+		glBufferSubData(GL_ARRAY_BUFFER, vb_size * i, vb_size, vertices);
+	free(vertices);
+
+	if (indexed) {
+		glGenBuffers(1, &ib);
+		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib);
+		glBufferData(GL_ELEMENT_ARRAY_BUFFER,
+			     ib_size * num_duplicates, NULL,
+			     GL_STATIC_DRAW);
+		for (unsigned i = 0; i < num_duplicates; i++) {
+			glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, ib_size * i,
+					ib_size, indices);
+		}
+		free(indices);
+	}
+	/* Make sure all uploads are finished. */
+	glFinish();
+
+	/* Test */
+	if (cull_method == RASTERIZER_DISCARD)
+		glEnable(GL_RASTERIZER_DISCARD);
+
+	glBindBuffer(GL_ARRAY_BUFFER, vb);
+	if (indexed)
+		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib);
+
+	is_indexed = indexed;
+	count = indexed ? num_indices : num_vertices;
+	duplicate_index = 0;
+
+	double rate = 0;
+
+	if (debug_num_iterations)
+		run_draw(debug_num_iterations);
+	else
+		rate = perf_measure_rate(run_draw, 0.15);
+
+	if (cull_method == RASTERIZER_DISCARD)
+		glDisable(GL_RASTERIZER_DISCARD);
+
+	/* Cleanup. */
+	glDeleteBuffers(1, &vb);
+	if (indexed)
+		glDeleteBuffers(1, &ib);
+	return rate;
+}
+
+enum piglit_result
+piglit_display(void)
+{
+	double rate;
+
+	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+	/* for debugging */
+	if (getenv("ONE")) {
+		glUseProgram(progs[0]);
+		run_test(100, true, BACK_FACE_CULLING, ceil(sqrt(0.5 * 512000)), 2, 50);
+		piglit_swap_buffers();
+		return PIGLIT_PASS;
+	}
+
+	static const unsigned num_quads_per_dim[] = {
+		/* The second number is the approx. number of primitives. */
+		ceil(sqrt(0.5 * 1000)),
+		ceil(sqrt(0.5 * 2000)),
+		ceil(sqrt(0.5 * 4000)),
+		ceil(sqrt(0.5 * 6000)),
+		ceil(sqrt(0.5 * 8000)),
+		ceil(sqrt(0.5 * 16000)),
+		ceil(sqrt(0.5 * 32000)),
+		ceil(sqrt(0.5 * 128000)),
+		ceil(sqrt(0.5 * 512000)),
+		/* 512000 is the maximum number when everything fits into the window */
+		/* After that, the prim size decreases, so you'll get subpixel prims. */
+		ceil(sqrt(0.5 * 2000000)),
+		ceil(sqrt(0.5 * 8000000)),
+	};
+
+	unsigned num_prims[ARRAY_SIZE(num_quads_per_dim)];
+	for (int i = 0; i < ARRAY_SIZE(num_quads_per_dim); i++)
+		num_prims[i] = num_quads_per_dim[i] * num_quads_per_dim[i] * 2;
+
+	printf("  Measuring %-27s,    0 Varying                                                                       4 Varyings                                                                      8 Varyings\n",
+	       gpu_freq_mhz ? "Prims/clock," : "GPrims/second,");
+	printf("  Draw Call     ,  Cull Method         ");
+
+	for (unsigned prog = 0; prog < ARRAY_SIZE(progs); prog++) {
+		if (prog)
+			printf("   ");
+		for (int i = 0; i < ARRAY_SIZE(num_prims); i++)
+			printf(", %4uK", num_prims[i] / 1000);
+	}
+	printf("\n");
+
+	for (int indexed = 1; indexed >= 0; indexed--) {
+		for (int cull_method = 0; cull_method < NUM_CULL_METHODS; cull_method++) {
+			unsigned num_subtests = 1;
+			static unsigned cull_percentages[] = {100, 75, 50, 25};
+			static double quad_sizes_in_pixels[] = {1.0 / 7, 0.25, 0.5};
+
+			if (cull_method == BACK_FACE_CULLING ||
+			    cull_method == VIEW_CULLING ||
+			    cull_method == DEGENERATE_PRIMS) {
+				num_subtests = ARRAY_SIZE(cull_percentages);
+			} else if (cull_method == SUBPIXEL_PRIMS) {
+				num_subtests = ARRAY_SIZE(quad_sizes_in_pixels);
+			}
+
+			for (unsigned subtest = 0; subtest < num_subtests; subtest++) {
+				/* 2 is the maximum prim size when everything fits into the window */
+				double quad_size_in_pixels;
+				unsigned cull_percentage;
+
+				if (cull_method == SUBPIXEL_PRIMS) {
+					quad_size_in_pixels = quad_sizes_in_pixels[subtest];
+					cull_percentage = 0;
+				} else {
+					quad_size_in_pixels = 2;
+					cull_percentage = cull_percentages[subtest];
+				}
+
+				printf("  %-14s, ", indexed ? "glDrawElements" : "glDrawArrays");
+
+				if (cull_method == NONE ||
+				    cull_method == RASTERIZER_DISCARD) {
+					printf("%-21s",
+					       cull_method == NONE ? "none" : "rasterizer discard");
+				} else if (cull_method == SUBPIXEL_PRIMS) {
+					printf("%2u small prims/pixel ",
+					       (unsigned)((1.0 / quad_size_in_pixels) *
+							  (1.0 / quad_size_in_pixels) * 2));
+				} else {
+					printf("%3u%% %-16s", cull_percentage,
+					       cull_method == BACK_FACE_CULLING ? "back faces" :
+						cull_method == VIEW_CULLING ?	  "culled by view" :
+						cull_method == DEGENERATE_PRIMS ? "degenerate prims" :
+										  "(error)");
+				}
+				fflush(stdout);
+
+				for (unsigned prog = 0; prog < ARRAY_SIZE(progs); prog++) {
+					glUseProgram(progs[prog]);
+
+					if (prog)
+						printf("   ");
+
+					for (int i = 0; i < ARRAY_SIZE(num_prims); i++) {
+						rate = run_test(false, indexed, cull_method, num_quads_per_dim[i],
+								quad_size_in_pixels, cull_percentage);
+						rate *= num_prims[i];
+
+						if (gpu_freq_mhz) {
+							rate /= gpu_freq_mhz * 1000000.0;
+							printf(",%6.2f", rate);
+						} else {
+							printf(",%6.2f", rate / 1000000000);
+						}
+						fflush(stdout);
+					}
+				}
+				printf("\n");
+			}
+		}
+	}
+
+	exit(0);
+	return PIGLIT_SKIP;
+}
diff --git a/tests/perf/drawoverhead.c b/tests/perf/drawoverhead.c
index b30d7d46a..d1fa8d3c1 100644
--- a/tests/perf/drawoverhead.c
+++ b/tests/perf/drawoverhead.c
@@ -634,21 +634,21 @@  draw_vertex_attrib_change(unsigned count)
 
 static double
 perf_run(const char *call, unsigned num_vbos, unsigned num_ubos,
 	 unsigned num_textures, unsigned num_tbos, unsigned num_images,
 	 unsigned num_imgbos,
 	 const char *change, perf_rate_func f, double base_rate)
 {
 	static unsigned test_index;
 	test_index++;
 
-	double rate = perf_measure_rate(f);
+	double rate = perf_measure_rate(f, 0.5);
 	double ratio = base_rate ? rate / base_rate : 1;
 
 	printf(" %3u, %s (%2u VBO| %u UBO| %2u %s) w/ %s change,%*s"
 	       COLOR_CYAN "%5u" COLOR_RESET ", %s%.1f%%" COLOR_RESET "\n",
 	       test_index, call, num_vbos, num_ubos,
 	       num_textures ? num_textures :
 	         num_tbos ? num_tbos :
 	         num_images ? num_images : num_imgbos,
 	       num_textures ? "Tex" :
 	         num_tbos ? "TBO" :