[Mesa-dev] radeonsi: fix GLSL textureGrad(samplerCube*) functions

Submitted by Marek Olšák on July 23, 2015, 11:11 p.m.

Details

Message ID 1437693092-9053-1-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák July 23, 2015, 11:11 p.m.
From: Marek Olšák <marek.olsak@amd.com>

+4 piglits
---
 src/gallium/drivers/radeon/radeon_llvm.h           |  3 +-
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 84 ++++++++++++++++------
 src/gallium/drivers/radeonsi/si_shader.c           | 35 ++++++---
 3 files changed, 89 insertions(+), 33 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index ef09ead..7390658 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -174,7 +174,8 @@  static inline LLVMValueRef bitcast(
 
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
                                           struct lp_build_emit_data * emit_data,
-                                          LLVMValueRef *coords_arg);
+                                          LLVMValueRef *coords_arg,
+					  LLVMValueRef *derivs_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
 
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 39bbdb6..8a4de05 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -748,32 +748,24 @@  static void kil_emit(
 	}
 }
 
-void radeon_llvm_emit_prepare_cube_coords(
-		struct lp_build_tgsi_context * bld_base,
-		struct lp_build_emit_data * emit_data,
-		LLVMValueRef *coords_arg)
+static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
+					  LLVMValueRef *in, LLVMValueRef *out)
 {
-
-	unsigned target = emit_data->inst->Texture.Texture;
-	unsigned opcode = emit_data->inst->Instruction.Opcode;
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMTypeRef type = bld_base->base.elem_type;
 	LLVMValueRef coords[4];
 	LLVMValueRef mad_args[3];
-	LLVMValueRef idx;
-	struct LLVMOpaqueValue *cube_vec;
-	LLVMValueRef v;
+	LLVMValueRef v, cube_vec;
 	unsigned i;
 
-	cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
+	cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
 	v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
                             &cube_vec, 1, LLVMReadNoneAttribute);
 
-	for (i = 0; i < 4; ++i) {
-		idx = lp_build_const_int32(gallivm, i);
-		coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
-	}
+	for (i = 0; i < 4; ++i)
+		coords[i] = LLVMBuildExtractElement(builder, v,
+						    lp_build_const_int32(gallivm, i), "");
 
 	coords[2] = build_intrinsic(builder, "fabs",
 			type, &coords[2], 1, LLVMReadNoneAttribute);
@@ -791,10 +783,60 @@  void radeon_llvm_emit_prepare_cube_coords(
 			mad_args[0], mad_args[1], mad_args[2]);
 
 	/* apply xyz = yxw swizzle to cooords */
-	coords[2] = coords[3];
-	coords[3] = coords[1];
-	coords[1] = coords[0];
-	coords[0] = coords[3];
+	out[0] = coords[1];
+	out[1] = coords[0];
+	out[2] = coords[3];
+}
+
+void radeon_llvm_emit_prepare_cube_coords(
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data,
+		LLVMValueRef *coords_arg,
+		LLVMValueRef *derivs_arg)
+{
+
+	unsigned target = emit_data->inst->Texture.Texture;
+	unsigned opcode = emit_data->inst->Instruction.Opcode;
+	struct gallivm_state * gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef coords[4];
+	unsigned i;
+
+	radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+
+	if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
+		LLVMValueRef derivs[4];
+		int axis;
+
+		/* Convert cube derivatives to 2D derivatives. */
+		for (axis = 0; axis < 2; axis++) {
+			LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
+
+			/* Shift the cube coordinates by the derivatives to get
+			 * the cube coordinates of the "neighboring pixel".
+			 */
+			for (i = 0; i < 3; i++)
+				shifted_cube_coords[i] =
+					LLVMBuildFAdd(builder, coords_arg[i],
+						      derivs_arg[axis*3+i], "");
+			shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
+
+			/* Project the shifted cube coordinates onto the face. */
+			radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
+						      shifted_coords);
+
+			/* Subtract both sets of 2D coordinates to get 2D derivatives.
+			 * This won't work if the shifted coordinates ended up
+			 * in a different face.
+			 */
+			for (i = 0; i < 2; i++)
+				derivs[axis*2+i] =
+					LLVMBuildFSub(builder, shifted_coords[i],
+						      coords[i], "");
+		}
+
+		memcpy(derivs_arg, derivs, sizeof(derivs));
+	}
 
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
@@ -864,7 +906,7 @@  static void txp_fetch_args(
 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
 	}
 
 	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
@@ -909,7 +951,7 @@  static void tex_fetch_args(
 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
 	}
 
 	emit_data->arg_count = 1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index cddd9a0..4693b82 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2226,12 +2226,13 @@  static void tex_fetch_args(
 	const struct tgsi_full_instruction * inst = emit_data->inst;
 	unsigned opcode = inst->Instruction.Opcode;
 	unsigned target = inst->Texture.Texture;
-	LLVMValueRef coords[5];
+	LLVMValueRef coords[5], derivs[6];
 	LLVMValueRef address[16];
 	int ref_pos;
 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
 	unsigned count = 0;
 	unsigned chan;
+	unsigned num_deriv_channels = 0;
 	unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
 	unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
 	bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
@@ -2312,18 +2313,13 @@  static void tex_fetch_args(
 		}
 	}
 
-	if (target == TGSI_TEXTURE_CUBE ||
-	    target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
 	/* Pack user derivatives */
 	if (opcode == TGSI_OPCODE_TXD) {
-		int num_deriv_channels, param;
+		int param, num_src_deriv_channels;
 
 		switch (target) {
 		case TGSI_TEXTURE_3D:
+			num_src_deriv_channels = 3;
 			num_deriv_channels = 3;
 			break;
 		case TGSI_TEXTURE_2D:
@@ -2332,27 +2328,44 @@  static void tex_fetch_args(
 		case TGSI_TEXTURE_SHADOWRECT:
 		case TGSI_TEXTURE_2D_ARRAY:
 		case TGSI_TEXTURE_SHADOW2D_ARRAY:
+			num_src_deriv_channels = 2;
+			num_deriv_channels = 2;
+			break;
 		case TGSI_TEXTURE_CUBE:
 		case TGSI_TEXTURE_SHADOWCUBE:
 		case TGSI_TEXTURE_CUBE_ARRAY:
 		case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+			/* Cube derivatives will be converted to 2D. */
+			num_src_deriv_channels = 3;
 			num_deriv_channels = 2;
 			break;
 		case TGSI_TEXTURE_1D:
 		case TGSI_TEXTURE_SHADOW1D:
 		case TGSI_TEXTURE_1D_ARRAY:
 		case TGSI_TEXTURE_SHADOW1D_ARRAY:
+			num_src_deriv_channels = 1;
 			num_deriv_channels = 1;
 			break;
 		default:
 			assert(0); /* no other targets are valid here */
 		}
 
-		for (param = 1; param <= 2; param++)
-			for (chan = 0; chan < num_deriv_channels; chan++)
-				address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
+		for (param = 0; param < 2; param++)
+			for (chan = 0; chan < num_src_deriv_channels; chan++)
+				derivs[param * num_src_deriv_channels + chan] =
+					lp_build_emit_fetch(bld_base, inst, param+1, chan);
 	}
 
+	if (target == TGSI_TEXTURE_CUBE ||
+	    target == TGSI_TEXTURE_CUBE_ARRAY ||
+	    target == TGSI_TEXTURE_SHADOWCUBE ||
+	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
+
+	if (opcode == TGSI_OPCODE_TXD)
+		for (int i = 0; i < num_deriv_channels * 2; i++)
+			address[count++] = derivs[i];
+
 	/* Pack texture coordinates */
 	address[count++] = coords[0];
 	if (num_coords > 1)

Comments

On 24.07.2015 08:11, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> +4 piglits

Awesome! I love piglits. :)

Just some cosmetic issues:


> @@ -174,7 +174,8 @@ static inline LLVMValueRef bitcast(
>  
>  void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
>                                            struct lp_build_emit_data * emit_data,
> -                                          LLVMValueRef *coords_arg);
> +                                          LLVMValueRef *coords_arg,
> +					  LLVMValueRef *derivs_arg);

Indentation is inconsistent between these two lines.


> +				derivs[axis*2+i] =
> +					LLVMBuildFSub(builder, shifted_coords[i],
> +						      coords[i], "");

I think the arithmetic is more readable with spaces:

				derivs[axis * 2 + i] =

But I'm fine with this either way.


With the inconsistent indentation fixed,

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>