[25/26] ac: add LLVM code for triangle culling

Submitted by Marek Olšák on Feb. 13, 2019, 5:16 a.m.

Details

Message ID 20190213051621.6235-26-maraeo@gmail.com
State Accepted
Commit eda281e9772deee630275ddd7c23fbac841ecb38
Headers show
Series "RadeonSI: Primitive culling with async compute" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 13, 2019, 5:16 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/amd/Makefile.sources      |   2 +
 src/amd/common/ac_llvm_cull.c | 275 ++++++++++++++++++++++++++++++++++
 src/amd/common/ac_llvm_cull.h |  59 ++++++++
 src/amd/common/meson.build    |   2 +
 4 files changed, 338 insertions(+)
 create mode 100644 src/amd/common/ac_llvm_cull.c
 create mode 100644 src/amd/common/ac_llvm_cull.h

Patch hide | download patch | download mbox

diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources
index 58e0008ee62..e1557ff5365 100644
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -39,6 +39,8 @@  AMD_COMPILER_FILES = \
 	common/ac_exp_param.h \
 	common/ac_llvm_build.c \
 	common/ac_llvm_build.h \
+	common/ac_llvm_cull.c \
+	common/ac_llvm_cull.h \
 	common/ac_llvm_helper.cpp \
 	common/ac_llvm_util.c \
 	common/ac_llvm_util.h \
diff --git a/src/amd/common/ac_llvm_cull.c b/src/amd/common/ac_llvm_cull.c
new file mode 100644
index 00000000000..1c2da3e0418
--- /dev/null
+++ b/src/amd/common/ac_llvm_cull.c
@@ -0,0 +1,275 @@ 
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "ac_llvm_cull.h"
+#include <llvm-c/Core.h>
+
+struct ac_position_w_info {
+	/* If a primitive intersects the W=0 plane, it causes a reflection
+	 * of the determinant used for face culling. Every vertex behind
+	 * the W=0 plane negates the determinant, so having 2 vertices behind
+	 * the plane has no effect. This is i1 true if the determinant should be
+	 * negated.
+	 */
+	LLVMValueRef w_reflection;
+
+	/* If we simplify the "-w <= p <= w" view culling equation, we get
+	 * "-w <= w", which can't be satisfied when w is negative.
+	 * In perspective projection, a negative W means that the primitive
+	 * is behind the viewer, but the equation is independent of the type
+	 * of projection.
+	 *
+	 * w_accepted is false when all W are negative and therefore
+	 * the primitive is invisible.
+	 */
+	LLVMValueRef w_accepted;
+
+	LLVMValueRef all_w_positive;
+	LLVMValueRef any_w_negative;
+};
+
+static void ac_analyze_position_w(struct ac_llvm_context *ctx,
+				  LLVMValueRef pos[3][4],
+				  struct ac_position_w_info *w)
+{
+	LLVMBuilderRef builder = ctx->builder;
+	LLVMValueRef all_w_negative = ctx->i1true;
+
+	w->w_reflection = ctx->i1false;
+	w->any_w_negative = ctx->i1false;
+
+	for (unsigned i = 0; i < 3; i++) {
+		LLVMValueRef neg_w;
+
+		neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
+		/* If neg_w is true, negate w_reflection. */
+		w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
+		w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
+		all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
+	}
+	w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
+	w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
+}
+
+/* Perform front/back face culling and return true if the primitive is accepted. */
+static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
+				 LLVMValueRef pos[3][4],
+				 struct ac_position_w_info *w,
+				 bool cull_front,
+				 bool cull_back,
+				 bool cull_zero_area)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	if (cull_front && cull_back)
+		return ctx->i1false;
+
+	if (!cull_front && !cull_back && !cull_zero_area)
+		return ctx->i1true;
+
+	/* Front/back face culling. Also if the determinant == 0, the triangle
+	 * area is 0.
+	 */
+	LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
+	LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
+	LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
+	LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
+	LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
+	LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
+	LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
+
+	/* Negative W negates the determinant. */
+	det = LLVMBuildSelect(builder, w->w_reflection,
+			      LLVMBuildFNeg(builder, det, ""),
+			      det, "");
+
+	LLVMValueRef accepted = NULL;
+	if (cull_front) {
+		LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
+		accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+	} else if (cull_back) {
+		LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
+		accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+	} else if (cull_zero_area) {
+		accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
+	}
+	return accepted;
+}
+
+/* Perform view culling and small primitive elimination and return true
+ * if the primitive is accepted and initially_accepted == true. */
+static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
+			      LLVMValueRef pos[3][4],
+			      LLVMValueRef initially_accepted,
+			      struct ac_position_w_info *w,
+			      LLVMValueRef vp_scale[2],
+			      LLVMValueRef vp_translate[2],
+			      LLVMValueRef small_prim_precision,
+			      bool cull_view_xy,
+			      bool cull_view_near_z,
+			      bool cull_view_far_z,
+			      bool cull_small_prims,
+			      bool use_halfz_clip_space)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
+		return ctx->i1true;
+
+	/* Skip the culling if the primitive has already been rejected or
+	 * if any W is negative. The bounding box culling doesn't work when
+	 * W is negative.
+	 */
+	LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
+					 w->all_w_positive, "");
+	LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
+	LLVMBuildStore(builder, initially_accepted, accepted_var);
+
+	ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
+	{
+		LLVMValueRef bbox_min[3], bbox_max[3];
+		LLVMValueRef accepted = initially_accepted;
+
+		/* Compute the primitive bounding box for easy culling. */
+		for (unsigned chan = 0; chan < 3; chan++) {
+			bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
+			bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
+
+			bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
+			bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
+		}
+
+		/* View culling. */
+		if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
+			for (unsigned chan = 0; chan < 3; chan++) {
+				LLVMValueRef visible;
+
+				if ((cull_view_xy && chan <= 1) ||
+				    (cull_view_near_z && chan == 2)) {
+					float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
+					visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
+								LLVMConstReal(ctx->f32, t), "");
+					accepted = LLVMBuildAnd(builder, accepted, visible, "");
+				}
+
+				if ((cull_view_xy && chan <= 1) ||
+				    (cull_view_far_z && chan == 2)) {
+					visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
+								ctx->f32_1, "");
+					accepted = LLVMBuildAnd(builder, accepted, visible, "");
+				}
+			}
+		}
+
+		/* Small primitive elimination. */
+		if (cull_small_prims) {
+			/* Assuming a sample position at (0.5, 0.5), if we round
+			 * the bounding box min/max extents and the results of
+			 * the rounding are equal in either the X or Y direction,
+			 * the bounding box does not intersect the sample.
+			 *
+			 * See these GDC slides for pictures:
+			 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
+			 */
+			LLVMValueRef min, max, not_equal[2], visible;
+
+			for (unsigned chan = 0; chan < 2; chan++) {
+				/* Convert the position to screen-space coordinates. */
+				min = ac_build_fmad(ctx, bbox_min[chan],
+						    vp_scale[chan], vp_translate[chan]);
+				max = ac_build_fmad(ctx, bbox_max[chan],
+						    vp_scale[chan], vp_translate[chan]);
+				/* Scale the bounding box according to the precision of
+				 * the rasterizer and the number of MSAA samples. */
+				min = LLVMBuildFSub(builder, min, small_prim_precision, "");
+				max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
+
+				/* Determine if the bbox intersects the sample point.
+				 * It also works for MSAA, but vp_scale, vp_translate,
+				 * and small_prim_precision are computed differently.
+				 */
+				min = ac_build_round(ctx, min);
+				max = ac_build_round(ctx, max);
+				not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
+			}
+			visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
+			accepted = LLVMBuildAnd(builder, accepted, visible, "");
+		}
+
+		LLVMBuildStore(builder, accepted, accepted_var);
+	}
+	ac_build_endif(ctx, 10000000);
+
+	return LLVMBuildLoad(builder, accepted_var, "");
+}
+
+/**
+ * Return i1 true if the primitive is accepted (not culled).
+ *
+ * \param pos                   Vertex positions 3x vec4
+ * \param initially_accepted    AND'ed with the result. Some computations can be
+ *                              skipped if this is false.
+ * \param vp_scale              Viewport scale XY.
+ *                              For MSAA, multiply them by the number of samples.
+ * \param vp_translate          Viewport translation XY.
+ *                              For MSAA, multiply them by the number of samples.
+ * \param small_prim_precision  Precision of small primitive culling. This should
+ *                              be the same as or greater than the precision of
+ *                              the rasterizer. Set to num_samples / 2^subpixel_bits.
+ *                              subpixel_bits are defined by the quantization mode.
+ * \param options               See ac_cull_options.
+ */
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
+			      LLVMValueRef pos[3][4],
+			      LLVMValueRef initially_accepted,
+			      LLVMValueRef vp_scale[2],
+			      LLVMValueRef vp_translate[2],
+			      LLVMValueRef small_prim_precision,
+			      struct ac_cull_options *options)
+{
+	struct ac_position_w_info w;
+	ac_analyze_position_w(ctx, pos, &w);
+
+	/* W culling. */
+	LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
+	accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
+
+	/* Face culling. */
+	accepted = LLVMBuildAnd(ctx->builder, accepted,
+				ac_cull_face(ctx, pos, &w,
+					     options->cull_front,
+					     options->cull_back,
+					     options->cull_zero_area), "");
+
+	/* View culling and small primitive elimination. */
+	accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
+			     small_prim_precision,
+			     options->cull_view_xy,
+			     options->cull_view_near_z,
+			     options->cull_view_far_z,
+			     options->cull_small_prims,
+			     options->use_halfz_clip_space);
+	return accepted;
+}
diff --git a/src/amd/common/ac_llvm_cull.h b/src/amd/common/ac_llvm_cull.h
new file mode 100644
index 00000000000..0aa6c902a68
--- /dev/null
+++ b/src/amd/common/ac_llvm_cull.h
@@ -0,0 +1,59 @@ 
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef AC_LLVM_CULL_H
+#define AC_LLVM_CULL_H
+
+#include "ac_llvm_build.h"
+
+struct ac_cull_options {
+	/* In general, I recommend setting all to true except view Z culling,
+	 * which isn't so effective because W culling is cheaper and partially
+	 * replaces near Z culling, and you don't need to set Position.z
+	 * if Z culling is disabled.
+	 *
+	 * If something doesn't work, turn some of these off to find out what.
+	 */
+	bool cull_front;
+	bool cull_back;
+	bool cull_view_xy;
+	bool cull_view_near_z;
+	bool cull_view_far_z;
+	bool cull_small_prims;
+	bool cull_zero_area;
+	bool cull_w; /* cull primitives with all W < 0 */
+
+	bool use_halfz_clip_space;
+};
+
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
+			      LLVMValueRef pos[3][4],
+			      LLVMValueRef initially_accepted,
+			      LLVMValueRef vp_scale[2],
+			      LLVMValueRef vp_translate[2],
+			      LLVMValueRef small_prim_precision,
+			      struct ac_cull_options *options);
+
+#endif
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index 6827a020947..c03433929d8 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -32,6 +32,8 @@  amd_common_files = files(
   'ac_exp_param.h',
   'ac_llvm_build.c',
   'ac_llvm_build.h',
+  'ac_llvm_cull.c',
+  'ac_llvm_cull.h',
   'ac_llvm_helper.cpp',
   'ac_llvm_util.c',
   'ac_llvm_util.h',