[v4,18/40] intel/compiler: fix ddx and ddy for 16-bit float

Submitted by Iago Toral Quiroga on Feb. 12, 2019, 11:55 a.m.

Details

Message ID 20190212115607.21467-19-itoral@igalia.com
State New
Headers show
Series "intel: VK_KHR_shader_float16_int8 implementation" ( rev: 16 15 14 13 12 11 10 9 8 7 6 ) in Mesa

Not browsing as part of any series.

Commit Message

Iago Toral Quiroga Feb. 12, 2019, 11:55 a.m.
We were assuming 32-bit elements. Also, In SIMD8 we pack 2 vector components
in a single SIMD register, so for example, component Y of a 16-bit vec2
starts is at byte offset 16B. This means that when we compute the offset of
the elements to be differentiated we should not stomp whatever base offset we
have, but instead add to it.

v2
 - Use byte_offset() helper (Jason)
 - Merge the fix for SIMD8: using byte_offset() fixes that too.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (v1)
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/intel/compiler/brw_fs_generator.cpp | 37 ++++++++++++-------------
 1 file changed, 18 insertions(+), 19 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index e3b68fa3165..996eafd4af1 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1248,10 +1248,9 @@  fs_generator::generate_ddx(const fs_inst *inst,
       width = BRW_WIDTH_4;
    }
 
-   struct brw_reg src0 = src;
+   struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
    struct brw_reg src1 = src;
 
-   src0.subnr   = sizeof(float);
    src0.vstride = vstride;
    src0.width   = width;
    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
@@ -1270,23 +1269,25 @@  void
 fs_generator::generate_ddy(const fs_inst *inst,
                            struct brw_reg dst, struct brw_reg src)
 {
+   const uint32_t type_size = type_sz(src.type);
+
    if (inst->opcode == FS_OPCODE_DDY_FINE) {
       /* produce accurate derivatives */
       if (devinfo->gen >= 11) {
          src = stride(src, 0, 2, 1);
-         struct brw_reg src_0  = byte_offset(src,  0 * sizeof(float));
-         struct brw_reg src_2  = byte_offset(src,  2 * sizeof(float));
-         struct brw_reg src_4  = byte_offset(src,  4 * sizeof(float));
-         struct brw_reg src_6  = byte_offset(src,  6 * sizeof(float));
-         struct brw_reg src_8  = byte_offset(src,  8 * sizeof(float));
-         struct brw_reg src_10 = byte_offset(src, 10 * sizeof(float));
-         struct brw_reg src_12 = byte_offset(src, 12 * sizeof(float));
-         struct brw_reg src_14 = byte_offset(src, 14 * sizeof(float));
-
-         struct brw_reg dst_0  = byte_offset(dst,  0 * sizeof(float));
-         struct brw_reg dst_4  = byte_offset(dst,  4 * sizeof(float));
-         struct brw_reg dst_8  = byte_offset(dst,  8 * sizeof(float));
-         struct brw_reg dst_12 = byte_offset(dst, 12 * sizeof(float));
+         struct brw_reg src_0  = byte_offset(src,  0 * type_size);
+         struct brw_reg src_2  = byte_offset(src,  2 * type_size);
+         struct brw_reg src_4  = byte_offset(src,  4 * type_size);
+         struct brw_reg src_6  = byte_offset(src,  6 * type_size);
+         struct brw_reg src_8  = byte_offset(src,  8 * type_size);
+         struct brw_reg src_10 = byte_offset(src, 10 * type_size);
+         struct brw_reg src_12 = byte_offset(src, 12 * type_size);
+         struct brw_reg src_14 = byte_offset(src, 14 * type_size);
+
+         struct brw_reg dst_0  = byte_offset(dst,  0 * type_size);
+         struct brw_reg dst_4  = byte_offset(dst,  4 * type_size);
+         struct brw_reg dst_8  = byte_offset(dst,  8 * type_size);
+         struct brw_reg dst_12 = byte_offset(dst, 12 * type_size);
 
          brw_push_insn_state(p);
          brw_set_default_exec_size(p, BRW_EXECUTE_4);
@@ -1313,10 +1314,8 @@  fs_generator::generate_ddy(const fs_inst *inst,
       }
    } else {
       /* replicate the derivative at the top-left pixel to other pixels */
-      struct brw_reg src0 = stride(src, 4, 4, 0);
-      struct brw_reg src1 = stride(src, 4, 4, 0);
-      src0.subnr = 0 * sizeof(float);
-      src1.subnr = 2 * sizeof(float);
+      struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
+      struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
 
       brw_ADD(p, dst, negate(src0), src1);
    }