[v3] nv110/exa: update sched codes

Submitted by Aaryaman Vasishta on June 10, 2017, 7:10 a.m.

Details

Message ID 20170610071057.13520-1-jem456.vasishta@gmail.com
State New
Headers show
Series "nv110/exa: update sched codes" ( rev: 3 ) in Nouveau

Not browsing as part of any series.

Commit Message

Aaryaman Vasishta June 10, 2017, 7:10 a.m.
This patch adds proper delays to maxwell exa shaders. rendercheck tests
seem consistent with/without this patch. I haven't extensively tested
them though.

Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays

Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com>
---
 src/shader/exac8nv110.fp  | 10 +++++-----
 src/shader/exac8nv110.fpc | 18 +++++++++---------
 src/shader/exacanv110.fp  | 10 +++++-----
 src/shader/exacanv110.fpc | 18 +++++++++---------
 src/shader/exacmnv110.fp  | 10 +++++-----
 src/shader/exacmnv110.fpc | 18 +++++++++---------
 src/shader/exas8nv110.fp  |  6 +++---
 src/shader/exas8nv110.fpc | 12 ++++++------
 src/shader/exasanv110.fp  | 10 +++++-----
 src/shader/exasanv110.fpc | 18 +++++++++---------
 src/shader/exascnv110.fp  |  6 +++---
 src/shader/exascnv110.fpc | 10 +++++-----
 src/shader/videonv110.fp  | 14 +++++++-------
 src/shader/videonv110.fpc | 26 +++++++++++++-------------
 14 files changed, 93 insertions(+), 93 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..101b67f 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -25,23 +25,23 @@  NV110FP_Composite_A8[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r1 $r2 0x0 0x1 t2d 0x8
 ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r2 0x0 0x0 t2d 0x8
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r0 $r1
 mov $r2 $r3 0xf
 mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 mov $r0 $r3 0xf
 exit
 #endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index 4aa1368..1f7d649 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -1,36 +1,36 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x005cbc03,
 0x0007ff02,
 0xe043ff89,
 0x2ff70201,
 0xc03a0014,
 0x4007ff03,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0074f,
+0x001fbc06,
 0x0007ff02,
 0xe043ff88,
 0x2ff70200,
 0xc03a0004,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe6,
+0x001f8400,
 0x00170003,
 0x5c681000,
 0x00370002,
 0x5c980780,
 0x00370001,
 0x5c980780,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00370000,
 0x5c980780,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index a70d5c5..8a9bd43 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -25,23 +25,23 @@  NV110FP_CAComposite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x1 t2d 0xf
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x4) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r7
 fmul ftz $r2 $r2 $r6
 fmul ftz $r1 $r1 $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 7c0ca5e..08a633c 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -1,36 +1,36 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x001d3c03,
 0x0007ff02,
 0xe043ff89,
 0xaff70204,
 0xc03a0017,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe9e0274f,
+0x001fbc04,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc2027e1,
+0x001f8400,
 0x00770303,
 0x5c681000,
 0x00670202,
 0x5c681000,
 0x00570101,
 0x5c681000,
-0xfc0007e0,
+0xfde00fe1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index fe5c294..39c49de 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -25,23 +25,23 @@  NV110FP_Composite[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
 ipa $r2 a[0x90] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x1 t2d 0x8
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r4
 fmul ftz $r2 $r2 $r4
 fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
index 9d62c1a..f5f06e2 100644
--- a/src/shader/exacmnv110.fpc
+++ b/src/shader/exacmnv110.fpc
@@ -1,36 +1,36 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x0008bc03,
 0x0007ff02,
 0xe043ff89,
 0x2ff70204,
 0xc03a0014,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0274f,
+0x001fbc06,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
 0x00470303,
 0x5c681000,
 0x00470202,
 0x5c681000,
 0x00470101,
 0x5c681000,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
index 4fe2e19..a555beb 100644
--- a/src/shader/exas8nv110.fp
+++ b/src/shader/exas8nv110.fp
@@ -25,15 +25,15 @@  NV110FP_Source_A8[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0x8
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
 mov $r3 $r0 0xf
 mov $r2 $r0 0xf
 mov $r1 $r0 0xf
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
index 1181c41..e58d168 100644
--- a/src/shader/exas8nv110.fpc
+++ b/src/shader/exas8nv110.fpc
@@ -1,21 +1,21 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001fbc03,
 0x0007ff00,
 0xe043ff88,
 0x2ff70000,
 0xc03a0004,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe1,
+0x001f8400,
 0x00070003,
 0x5c980780,
 0x00070002,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
index 61374a6..9f8742a 100644
--- a/src/shader/exasanv110.fp
+++ b/src/shader/exasanv110.fp
@@ -25,23 +25,23 @@  NV110FP_CACompositeSrcAlpha[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
 ipa $r2 a[0x80] $r0 0x0 0x1
 tex nodep $r4 $r2 0x0 0x0 t2d 0x8
 ipa $r1 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
 ipa $r0 a[0x90] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x1 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
 fmul ftz $r3 $r3 $r4
 fmul ftz $r2 $r2 $r4
 fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
 fmul ftz $r0 $r0 $r4
 exit
 #endif
diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
index 5516a03..c291298 100644
--- a/src/shader/exasanv110.fpc
+++ b/src/shader/exasanv110.fpc
@@ -1,36 +1,36 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff03,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x0008bc03,
 0x0007ff02,
 0xe043ff88,
 0x2ff70204,
 0xc03a0004,
 0x4007ff01,
 0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe5e0274f,
+0x001fbc06,
 0x0007ff00,
 0xe043ff89,
 0xaff70000,
 0xc03a0017,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
 0x00470303,
 0x5c681000,
 0x00470202,
 0x5c681000,
 0x00470101,
 0x5c681000,
-0xfc0007e0,
+0xfde007e1,
 0x001f8000,
 0x00470000,
 0x5c681000,
diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
index 90bbb55..86e14e8 100644
--- a/src/shader/exascnv110.fp
+++ b/src/shader/exascnv110.fp
@@ -25,14 +25,14 @@  NV110FP_Source[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r0 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r0 $r0
 ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
 ipa $r0 a[0x80] $r0 0x0 0x1
 tex nodep $r0 $r0 0x0 0x0 t2d 0xf
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
index 2dba15d..1fef5d2 100644
--- a/src/shader/exascnv110.fpc
+++ b/src/shader/exascnv110.fpc
@@ -1,20 +1,20 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff00,
 0xe003ff87,
 0x00470000,
 0x50800000,
 0x4007ff01,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xfde0072f,
+0x001fbc03,
 0x0007ff00,
 0xe043ff88,
 0xaff70000,
 0xc03a0007,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe3000000,
diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
index 2728311..dd3816c 100644
--- a/src/shader/videonv110.fp
+++ b/src/shader/videonv110.fp
@@ -25,30 +25,30 @@  NV110FP_NV12[] = {
 };
 #else
 
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
 ipa pass $r2 a[0x7c] 0x0 0x0 0x1
 mufu rcp $r2 $r2
 ipa $r0 a[0x80] $r2 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
 ipa $r1 a[0x84] $r2 0x0 0x1
 tex nodep $r4 $r0 0x0 0x0 t2d 0x8
 tex nodep $r0 $r0 0x0 0x1 t2d 0xc
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x6 wt 0x1) (st 0x6)
 depbar le 0x5 0x1 0x1
 fmul ftz $r5 $r4 c0[0x0]
 fadd ftz $r3 $r5 c0[0x4]
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0x6) (st 0xf)
 fadd ftz $r4 $r5 c0[0x8]
 fadd ftz $r5 $r5 c0[0xc]
 depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x2) (st 0x1) (st 0x1)
 ffma ftz $r3 $r0 c0[0x10] $r3
 ffma ftz $r4 $r0 c0[0x14] $r4
 ffma ftz $r5 $r0 c0[0x18] $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x6)
 ffma ftz $r0 $r1 c0[0x1c] $r3
 ffma ftz $r2 $r1 c0[0x24] $r5
 ffma ftz $r1 $r1 c0[0x20] $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
 exit
 #endif
diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
index 31d745a..8fbc246 100644
--- a/src/shader/videonv110.fpc
+++ b/src/shader/videonv110.fpc
@@ -1,52 +1,52 @@ 
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
 0xcff7ff02,
 0xe003ff87,
 0x00470202,
 0x50800000,
 0x0027ff00,
 0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
 0x4027ff01,
 0xe043ff88,
 0x2ff70004,
 0xc03a0004,
 0x2ff70000,
 0xc03a0016,
-0xfc0007e0,
-0x001f8000,
+0xfcc007ef,
+0x001f9801,
 0x34170001,
 0xf0f00000,
 0x00070405,
 0x4c681000,
 0x00170503,
 0x4c581000,
-0xfc0007e0,
-0x001f8000,
+0xfcc007e6,
+0x001fbc00,
 0x00270504,
 0x4c581000,
 0x00370505,
 0x4c581000,
 0x34070000,
 0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc2017e6,
+0x001f8400,
 0x00470003,
 0x49a00180,
 0x00570004,
 0x49a00200,
 0x00670005,
 0x49a00280,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f9800,
 0x00770100,
 0x49a00180,
 0x00970102,
 0x49a00280,
 0x00870101,
 0x49a00200,
-0xfc0007e0,
+0xfc0007ef,
 0x001f8000,
 0x0007000f,
 0xe3000000,

Comments

See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and
exasanv110.fp. Any ideas on what could be causing the first fmul to require
$r0 and/or $r1?

Cheers,
Aaryaman

On Sat, Jun 10, 2017 at 4:10 PM, Aaryaman Vasishta <
jem456.vasishta@gmail.com> wrote:

> This patch adds proper delays to maxwell exa shaders. rendercheck tests
> seem consistent with/without this patch. I haven't extensively tested
> them though.
>
> Trello:
> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-
> with-proper-delays
>
> Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com>
> ---
>  src/shader/exac8nv110.fp  | 10 +++++-----
>  src/shader/exac8nv110.fpc | 18 +++++++++---------
>  src/shader/exacanv110.fp  | 10 +++++-----
>  src/shader/exacanv110.fpc | 18 +++++++++---------
>  src/shader/exacmnv110.fp  | 10 +++++-----
>  src/shader/exacmnv110.fpc | 18 +++++++++---------
>  src/shader/exas8nv110.fp  |  6 +++---
>  src/shader/exas8nv110.fpc | 12 ++++++------
>  src/shader/exasanv110.fp  | 10 +++++-----
>  src/shader/exasanv110.fpc | 18 +++++++++---------
>  src/shader/exascnv110.fp  |  6 +++---
>  src/shader/exascnv110.fpc | 10 +++++-----
>  src/shader/videonv110.fp  | 14 +++++++-------
>  src/shader/videonv110.fpc | 26 +++++++++++++-------------
>  14 files changed, 93 insertions(+), 93 deletions(-)
>
> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> index ce78036..101b67f 100644
> --- a/src/shader/exac8nv110.fp
> +++ b/src/shader/exac8nv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
>  ipa $r2 a[0x90] $r0 0x0 0x1
>  tex nodep $r1 $r2 0x0 0x1 t2d 0x8
>  ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>  ipa $r2 a[0x80] $r0 0x0 0x1
>  tex nodep $r0 $r2 0x0 0x0 t2d 0x8
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
>  fmul ftz $r3 $r0 $r1
>  mov $r2 $r3 0xf
>  mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
>  mov $r0 $r3 0xf
>  exit
>  #endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> index 4aa1368..1f7d649 100644
> --- a/src/shader/exac8nv110.fpc
> +++ b/src/shader/exac8nv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff03,
>  0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0x21e0072f,
> +0x005cbc03,
>  0x0007ff02,
>  0xe043ff89,
>  0x2ff70201,
>  0xc03a0014,
>  0x4007ff03,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0074f,
> +0x001fbc06,
>  0x0007ff02,
>  0xe043ff88,
>  0x2ff70200,
>  0xc03a0004,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe6,
> +0x001f8400,
>  0x00170003,
>  0x5c681000,
>  0x00370002,
>  0x5c980780,
>  0x00370001,
>  0x5c980780,
> -0xfc0007e0,
> +0xfde007e1,
>  0x001f8000,
>  0x00370000,
>  0x5c980780,
> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> index a70d5c5..8a9bd43 100644
> --- a/src/shader/exacanv110.fp
> +++ b/src/shader/exacanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2)
>  ipa $r2 a[0x90] $r0 0x0 0x1
>  tex nodep $r4 $r2 0x0 0x1 t2d 0xf
>  ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf)
>  ipa $r0 a[0x80] $r0 0x0 0x1
>  tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x4) (st 0x1) (st 0x1)
>  fmul ftz $r3 $r3 $r7
>  fmul ftz $r2 $r2 $r6
>  fmul ftz $r1 $r1 $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x1) (st 0xf) (st 0x0)
>  fmul ftz $r0 $r0 $r4
>  exit
>  #endif
> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> index 7c0ca5e..08a633c 100644
> --- a/src/shader/exacanv110.fpc
> +++ b/src/shader/exacanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff03,
>  0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0x21e0072f,
> +0x001d3c03,
>  0x0007ff02,
>  0xe043ff89,
>  0xaff70204,
>  0xc03a0017,
>  0x4007ff01,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe9e0274f,
> +0x001fbc04,
>  0x0007ff00,
>  0xe043ff88,
>  0xaff70000,
>  0xc03a0007,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2027e1,
> +0x001f8400,
>  0x00770303,
>  0x5c681000,
>  0x00670202,
>  0x5c681000,
>  0x00570101,
>  0x5c681000,
> -0xfc0007e0,
> +0xfde00fe1,
>  0x001f8000,
>  0x00470000,
>  0x5c681000,
> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> index fe5c294..39c49de 100644
> --- a/src/shader/exacmnv110.fp
> +++ b/src/shader/exacmnv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
>  ipa $r2 a[0x90] $r0 0x0 0x1
>  tex nodep $r4 $r2 0x0 0x1 t2d 0x8
>  ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>  ipa $r0 a[0x80] $r0 0x0 0x1
>  tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>  fmul ftz $r3 $r3 $r4
>  fmul ftz $r2 $r2 $r4
>  fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
>  fmul ftz $r0 $r0 $r4
>  exit
>  #endif
> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> index 9d62c1a..f5f06e2 100644
> --- a/src/shader/exacmnv110.fpc
> +++ b/src/shader/exacmnv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff03,
>  0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x0008bc03,
>  0x0007ff02,
>  0xe043ff89,
>  0x2ff70204,
>  0xc03a0014,
>  0x4007ff01,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0274f,
> +0x001fbc06,
>  0x0007ff00,
>  0xe043ff88,
>  0xaff70000,
>  0xc03a0007,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
>  0x00470303,
>  0x5c681000,
>  0x00470202,
>  0x5c681000,
>  0x00470101,
>  0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
>  0x001f8000,
>  0x00470000,
>  0x5c681000,
> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> index 4fe2e19..a555beb 100644
> --- a/src/shader/exas8nv110.fp
> +++ b/src/shader/exas8nv110.fp
> @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
>  ipa $r0 a[0x80] $r0 0x0 0x1
>  tex nodep $r0 $r0 0x0 0x0 t2d 0x8
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
>  mov $r3 $r0 0xf
>  mov $r2 $r0 0xf
>  mov $r1 $r0 0xf
> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> index 1181c41..e58d168 100644
> --- a/src/shader/exas8nv110.fpc
> +++ b/src/shader/exas8nv110.fpc
> @@ -1,21 +1,21 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff01,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001fbc03,
>  0x0007ff00,
>  0xe043ff88,
>  0x2ff70000,
>  0xc03a0004,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe1,
> +0x001f8400,
>  0x00070003,
>  0x5c980780,
>  0x00070002,
> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> index 61374a6..9f8742a 100644
> --- a/src/shader/exasanv110.fp
> +++ b/src/shader/exasanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
>  ipa $r2 a[0x80] $r0 0x0 0x1
>  tex nodep $r4 $r2 0x0 0x0 t2d 0x8
>  ipa $r1 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>  ipa $r0 a[0x90] $r0 0x0 0x1
>  tex nodep $r0 $r0 0x0 0x1 t2d 0xf
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>  fmul ftz $r3 $r3 $r4
>  fmul ftz $r2 $r2 $r4
>  fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
>  fmul ftz $r0 $r0 $r4
>  exit
>  #endif
> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> index 5516a03..c291298 100644
> --- a/src/shader/exasanv110.fpc
> +++ b/src/shader/exasanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff03,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x0008bc03,
>  0x0007ff02,
>  0xe043ff88,
>  0x2ff70204,
>  0xc03a0004,
>  0x4007ff01,
>  0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0274f,
> +0x001fbc06,
>  0x0007ff00,
>  0xe043ff89,
>  0xaff70000,
>  0xc03a0017,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
>  0x00470303,
>  0x5c681000,
>  0x00470202,
>  0x5c681000,
>  0x00470101,
>  0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
>  0x001f8000,
>  0x00470000,
>  0x5c681000,
> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> index 90bbb55..86e14e8 100644
> --- a/src/shader/exascnv110.fp
> +++ b/src/shader/exascnv110.fp
> @@ -25,14 +25,14 @@ NV110FP_Source[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r0 $r0
>  ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
>  ipa $r0 a[0x80] $r0 0x0 0x1
>  tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
>  exit
>  #endif
> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> index 2dba15d..1fef5d2 100644
> --- a/src/shader/exascnv110.fpc
> +++ b/src/shader/exascnv110.fpc
> @@ -1,20 +1,20 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff00,
>  0xe003ff87,
>  0x00470000,
>  0x50800000,
>  0x4007ff01,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xfde0072f,
> +0x001fbc03,
>  0x0007ff00,
>  0xe043ff88,
>  0xaff70000,
>  0xc03a0007,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> +0xfc0007ef,
>  0x001f8000,
>  0x0007000f,
>  0xe3000000,
> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> index 2728311..dd3816c 100644
> --- a/src/shader/videonv110.fp
> +++ b/src/shader/videonv110.fp
> @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
>  };
>  #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>  ipa pass $r2 a[0x7c] 0x0 0x0 0x1
>  mufu rcp $r2 $r2
>  ipa $r0 a[0x80] $r2 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>  ipa $r1 a[0x84] $r2 0x0 0x1
>  tex nodep $r4 $r0 0x0 0x0 t2d 0x8
>  tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x6 wt 0x1) (st 0x6)
>  depbar le 0x5 0x1 0x1
>  fmul ftz $r5 $r4 c0[0x0]
>  fadd ftz $r3 $r5 c0[0x4]
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6) (st 0x6) (st 0xf)
>  fadd ftz $r4 $r5 c0[0x8]
>  fadd ftz $r5 $r5 c0[0xc]
>  depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x2) (st 0x1) (st 0x1)
>  ffma ftz $r3 $r0 c0[0x10] $r3
>  ffma ftz $r4 $r0 c0[0x14] $r4
>  ffma ftz $r5 $r0 c0[0x18] $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0x1) (st 0x6)
>  ffma ftz $r0 $r1 c0[0x1c] $r3
>  ffma ftz $r2 $r1 c0[0x24] $r5
>  ffma ftz $r1 $r1 c0[0x20] $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
>  exit
>  #endif
> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> index 31d745a..8fbc246 100644
> --- a/src/shader/videonv110.fpc
> +++ b/src/shader/videonv110.fpc
> @@ -1,52 +1,52 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
>  0xcff7ff02,
>  0xe003ff87,
>  0x00470202,
>  0x50800000,
>  0x0027ff00,
>  0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
>  0x4027ff01,
>  0xe043ff88,
>  0x2ff70004,
>  0xc03a0004,
>  0x2ff70000,
>  0xc03a0016,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007ef,
> +0x001f9801,
>  0x34170001,
>  0xf0f00000,
>  0x00070405,
>  0x4c681000,
>  0x00170503,
>  0x4c581000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007e6,
> +0x001fbc00,
>  0x00270504,
>  0x4c581000,
>  0x00370505,
>  0x4c581000,
>  0x34070000,
>  0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2017e6,
> +0x001f8400,
>  0x00470003,
>  0x49a00180,
>  0x00570004,
>  0x49a00200,
>  0x00670005,
>  0x49a00280,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2007e1,
> +0x001f9800,
>  0x00770100,
>  0x49a00180,
>  0x00970102,
>  0x49a00280,
>  0x00870101,
>  0x49a00200,
> -0xfc0007e0,
> +0xfc0007ef,
>  0x001f8000,
>  0x0007000f,
>  0xe3000000,
> --
> 2.11.0
>
>
On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and 
> exasanv110.fp. Any ideas on what could be causing the first fmul to 
> require $r0 and/or $r1?

'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'

is actually:

'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'

Very confusing, I know.

> 
> Cheers,
> Aaryaman
> 
> On Sat, Jun 10, 2017 at 4:10 PM, Aaryaman Vasishta 
> <jem456.vasishta@gmail.com <mailto:jem456.vasishta@gmail.com>> wrote:
> 
>     This patch adds proper delays to maxwell exa shaders. rendercheck tests
>     seem consistent with/without this patch. I haven't extensively tested
>     them though.
> 
>     Trello:
>     https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
>     <https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays>
> 
>     Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com
>     <mailto:jem456.vasishta@gmail.com>>
>     ---
>       src/shader/exac8nv110.fp  | 10 +++++-----
>       src/shader/exac8nv110.fpc | 18 +++++++++---------
>       src/shader/exacanv110.fp  | 10 +++++-----
>       src/shader/exacanv110.fpc | 18 +++++++++---------
>       src/shader/exacmnv110.fp  | 10 +++++-----
>       src/shader/exacmnv110.fpc | 18 +++++++++---------
>       src/shader/exas8nv110.fp  |  6 +++---
>       src/shader/exas8nv110.fpc | 12 ++++++------
>       src/shader/exasanv110.fp  | 10 +++++-----
>       src/shader/exasanv110.fpc | 18 +++++++++---------
>       src/shader/exascnv110.fp  |  6 +++---
>       src/shader/exascnv110.fpc | 10 +++++-----
>       src/shader/videonv110.fp  | 14 +++++++-------
>       src/shader/videonv110.fpc | 26 +++++++++++++-------------
>       14 files changed, 93 insertions(+), 93 deletions(-)
> 
>     diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
>     index ce78036..101b67f 100644
>     --- a/src/shader/exac8nv110.fp
>     +++ b/src/shader/exac8nv110.fp
>     @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r3 a[0x94] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1
>     wt 0x2)
>       ipa $r2 a[0x90] $r0 0x0 0x1
>       tex nodep $r1 $r2 0x0 0x1 t2d 0x8
>       ipa $r3 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>       ipa $r2 a[0x80] $r0 0x0 0x1
>       tex nodep $r0 $r2 0x0 0x0 t2d 0x8
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
>       fmul ftz $r3 $r0 $r1
>       mov $r2 $r3 0xf
>       mov $r1 $r3 0xf
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1) (st 0xf) (st 0x0)
>       mov $r0 $r3 0xf
>       exit
>       #endif
>     diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
>     index 4aa1368..1f7d649 100644
>     --- a/src/shader/exac8nv110.fpc
>     +++ b/src/shader/exac8nv110.fpc
>     @@ -1,36 +1,36 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff03,
>       0xe043ff89,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0x21e0072f,
>     +0x005cbc03,
>       0x0007ff02,
>       0xe043ff89,
>       0x2ff70201,
>       0xc03a0014,
>       0x4007ff03,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe5e0074f,
>     +0x001fbc06,
>       0x0007ff02,
>       0xe043ff88,
>       0x2ff70200,
>       0xc03a0004,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc201fe6,
>     +0x001f8400,
>       0x00170003,
>       0x5c681000,
>       0x00370002,
>       0x5c980780,
>       0x00370001,
>       0x5c980780,
>     -0xfc0007e0,
>     +0xfde007e1,
>       0x001f8000,
>       0x00370000,
>       0x5c980780,
>     diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
>     index a70d5c5..8a9bd43 100644
>     --- a/src/shader/exacanv110.fp
>     +++ b/src/shader/exacanv110.fp
>     @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r3 a[0x94] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2)
>       ipa $r2 a[0x90] $r0 0x0 0x1
>       tex nodep $r4 $r2 0x0 0x1 t2d 0xf
>       ipa $r1 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf)
>       ipa $r0 a[0x80] $r0 0x0 0x1
>       tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1 wt 0x4) (st 0x1) (st 0x1)
>       fmul ftz $r3 $r3 $r7
>       fmul ftz $r2 $r2 $r6
>       fmul ftz $r1 $r1 $r5
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1 wt 0x1) (st 0xf) (st 0x0)
>       fmul ftz $r0 $r0 $r4
>       exit
>       #endif
>     diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
>     index 7c0ca5e..08a633c 100644
>     --- a/src/shader/exacanv110.fpc
>     +++ b/src/shader/exacanv110.fpc
>     @@ -1,36 +1,36 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff03,
>       0xe043ff89,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0x21e0072f,
>     +0x001d3c03,
>       0x0007ff02,
>       0xe043ff89,
>       0xaff70204,
>       0xc03a0017,
>       0x4007ff01,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe9e0274f,
>     +0x001fbc04,
>       0x0007ff00,
>       0xe043ff88,
>       0xaff70000,
>       0xc03a0007,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc2027e1,
>     +0x001f8400,
>       0x00770303,
>       0x5c681000,
>       0x00670202,
>       0x5c681000,
>       0x00570101,
>       0x5c681000,
>     -0xfc0007e0,
>     +0xfde00fe1,
>       0x001f8000,
>       0x00470000,
>       0x5c681000,
>     diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
>     index fe5c294..39c49de 100644
>     --- a/src/shader/exacmnv110.fp
>     +++ b/src/shader/exacmnv110.fp
>     @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r3 a[0x94] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
>       ipa $r2 a[0x90] $r0 0x0 0x1
>       tex nodep $r4 $r2 0x0 0x1 t2d 0x8
>       ipa $r1 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>       ipa $r0 a[0x80] $r0 0x0 0x1
>       tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>       fmul ftz $r3 $r3 $r4
>       fmul ftz $r2 $r2 $r4
>       fmul ftz $r1 $r1 $r4
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1) (st 0xf) (st 0x0)
>       fmul ftz $r0 $r0 $r4
>       exit
>       #endif
>     diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
>     index 9d62c1a..f5f06e2 100644
>     --- a/src/shader/exacmnv110.fpc
>     +++ b/src/shader/exacmnv110.fpc
>     @@ -1,36 +1,36 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff03,
>       0xe043ff89,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1e0072f,
>     +0x0008bc03,
>       0x0007ff02,
>       0xe043ff89,
>       0x2ff70204,
>       0xc03a0014,
>       0x4007ff01,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe5e0274f,
>     +0x001fbc06,
>       0x0007ff00,
>       0xe043ff88,
>       0xaff70000,
>       0xc03a0007,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc201fe1,
>     +0x001f8400,
>       0x00470303,
>       0x5c681000,
>       0x00470202,
>       0x5c681000,
>       0x00470101,
>       0x5c681000,
>     -0xfc0007e0,
>     +0xfde007e1,
>       0x001f8000,
>       0x00470000,
>       0x5c681000,
>     diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
>     index 4fe2e19..a555beb 100644
>     --- a/src/shader/exas8nv110.fp
>     +++ b/src/shader/exas8nv110.fp
>     @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r1 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
>       ipa $r0 a[0x80] $r0 0x0 0x1
>       tex nodep $r0 $r0 0x0 0x0 t2d 0x8
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
>       mov $r3 $r0 0xf
>       mov $r2 $r0 0xf
>       mov $r1 $r0 0xf
>     diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
>     index 1181c41..e58d168 100644
>     --- a/src/shader/exas8nv110.fpc
>     +++ b/src/shader/exas8nv110.fpc
>     @@ -1,21 +1,21 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff01,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1e0072f,
>     +0x001fbc03,
>       0x0007ff00,
>       0xe043ff88,
>       0x2ff70000,
>       0xc03a0004,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc200fe1,
>     +0x001f8400,
>       0x00070003,
>       0x5c980780,
>       0x00070002,
>     diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
>     index 61374a6..9f8742a 100644
>     --- a/src/shader/exasanv110.fp
>     +++ b/src/shader/exasanv110.fp
>     @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r3 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
>       ipa $r2 a[0x80] $r0 0x0 0x1
>       tex nodep $r4 $r2 0x0 0x0 t2d 0x8
>       ipa $r1 a[0x94] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>       ipa $r0 a[0x90] $r0 0x0 0x1
>       tex nodep $r0 $r0 0x0 0x1 t2d 0xf
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>       fmul ftz $r3 $r3 $r4
>       fmul ftz $r2 $r2 $r4
>       fmul ftz $r1 $r1 $r4
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1) (st 0xf) (st 0x0)
>       fmul ftz $r0 $r0 $r4
>       exit
>       #endif
>     diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
>     index 5516a03..c291298 100644
>     --- a/src/shader/exasanv110.fpc
>     +++ b/src/shader/exasanv110.fpc
>     @@ -1,36 +1,36 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff03,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1e0072f,
>     +0x0008bc03,
>       0x0007ff02,
>       0xe043ff88,
>       0x2ff70204,
>       0xc03a0004,
>       0x4007ff01,
>       0xe043ff89,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe5e0274f,
>     +0x001fbc06,
>       0x0007ff00,
>       0xe043ff89,
>       0xaff70000,
>       0xc03a0017,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc201fe1,
>     +0x001f8400,
>       0x00470303,
>       0x5c681000,
>       0x00470202,
>       0x5c681000,
>       0x00470101,
>       0x5c681000,
>     -0xfc0007e0,
>     +0xfde007e1,
>       0x001f8000,
>       0x00470000,
>       0x5c681000,
>     diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
>     index 90bbb55..86e14e8 100644
>     --- a/src/shader/exascnv110.fp
>     +++ b/src/shader/exascnv110.fp
>     @@ -25,14 +25,14 @@ NV110FP_Source[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r0 $r0
>       ipa $r1 a[0x84] $r0 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
>       ipa $r0 a[0x80] $r0 0x0 0x1
>       tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf) (st 0x0) (st 0x0)
>       exit
>       #endif
>     diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
>     index 2dba15d..1fef5d2 100644
>     --- a/src/shader/exascnv110.fpc
>     +++ b/src/shader/exascnv110.fpc
>     @@ -1,20 +1,20 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff00,
>       0xe003ff87,
>       0x00470000,
>       0x50800000,
>       0x4007ff01,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfde0072f,
>     +0x001fbc03,
>       0x0007ff00,
>       0xe043ff88,
>       0xaff70000,
>       0xc03a0007,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     +0xfc0007ef,
>       0x001f8000,
>       0x0007000f,
>       0xe3000000,
>     diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
>     index 2728311..dd3816c 100644
>     --- a/src/shader/videonv110.fp
>     +++ b/src/shader/videonv110.fp
>     @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
>       };
>       #else
> 
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>       ipa pass $r2 a[0x7c] 0x0 0x0 0x1
>       mufu rcp $r2 $r2
>       ipa $r0 a[0x80] $r2 0x0 0x1
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>       ipa $r1 a[0x84] $r2 0x0 0x1
>       tex nodep $r4 $r0 0x0 0x0 t2d 0x8
>       tex nodep $r0 $r0 0x0 0x1 t2d 0xc
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf) (st 0x6 wt 0x1) (st 0x6)
>       depbar le 0x5 0x1 0x1
>       fmul ftz $r5 $r4 c0[0x0]
>       fadd ftz $r3 $r5 c0[0x4]
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x6) (st 0x6) (st 0xf)
>       fadd ftz $r4 $r5 c0[0x8]
>       fadd ftz $r5 $r5 c0[0xc]
>       depbar le 0x5 0x0 0x0
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x6 wt 0x2) (st 0x1) (st 0x1)
>       ffma ftz $r3 $r0 c0[0x10] $r3
>       ffma ftz $r4 $r0 c0[0x14] $r4
>       ffma ftz $r5 $r0 c0[0x18] $r5
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0x1) (st 0x1) (st 0x6)
>       ffma ftz $r0 $r1 c0[0x1c] $r3
>       ffma ftz $r2 $r1 c0[0x24] $r5
>       ffma ftz $r1 $r1 c0[0x20] $r4
>     -sched (st 0x0) (st 0x0) (st 0x0)
>     +sched (st 0xf) (st 0x0) (st 0x0)
>       exit
>       #endif
>     diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
>     index 31d745a..8fbc246 100644
>     --- a/src/shader/videonv110.fpc
>     +++ b/src/shader/videonv110.fpc
>     @@ -1,52 +1,52 @@
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1a0070f,
>     +0x003c3c01,
>       0xcff7ff02,
>       0xe003ff87,
>       0x00470202,
>       0x50800000,
>       0x0027ff00,
>       0xe043ff88,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xe1e0072f,
>     +0x001cbc03,
>       0x4027ff01,
>       0xe043ff88,
>       0x2ff70004,
>       0xc03a0004,
>       0x2ff70000,
>       0xc03a0016,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfcc007ef,
>     +0x001f9801,
>       0x34170001,
>       0xf0f00000,
>       0x00070405,
>       0x4c681000,
>       0x00170503,
>       0x4c581000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfcc007e6,
>     +0x001fbc00,
>       0x00270504,
>       0x4c581000,
>       0x00370505,
>       0x4c581000,
>       0x34070000,
>       0xf0f00000,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc2017e6,
>     +0x001f8400,
>       0x00470003,
>       0x49a00180,
>       0x00570004,
>       0x49a00200,
>       0x00670005,
>       0x49a00280,
>     -0xfc0007e0,
>     -0x001f8000,
>     +0xfc2007e1,
>     +0x001f9800,
>       0x00770100,
>       0x49a00180,
>       0x00970102,
>       0x49a00280,
>       0x00870101,
>       0x49a00200,
>     -0xfc0007e0,
>     +0xfc0007ef,
>       0x001f8000,
>       0x0007000f,
>       0xe3000000,
>     --
>     2.11.0
> 
>
On Mon, Jun 12, 2017 at 5:46 PM, Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
>
> On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
>>
>> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and
>> exasanv110.fp. Any ideas on what could be causing the first fmul to require
>> $r0 and/or $r1?
>
>
> 'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'
>
> is actually:
>
> 'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'

Actually more like:

tex nodep $r4:$r5:$r6:$r7 $r2:$r3 ...
Sorry for the late response, been busy with some personal stuff + work...

On Tue, Jun 13, 2017 at 6:52 AM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:

> On Mon, Jun 12, 2017 at 5:46 PM, Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
> >
> >
> > On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
> >>
> >> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and
> >> exasanv110.fp. Any ideas on what could be causing the first fmul to
> require
> >> $r0 and/or $r1?
> >
> >
> > 'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'
> >
> > is actually:
> >
> > 'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'
>
That's definitely confusing, but quite interesting.

>
> Actually more like:
>
> tex nodep $r4:$r5:$r6:$r7 $r2:$r3 ...
>
Any idea why is it this way? The only way I could figure out so far is by
trial and error, but is there any better way to detect such dependencies?

In the mean time, I'll update the shaders again and start to do some tests.
Any tips on how to proceed doing so would really be appreciated ^_^

Cheers,
Aaryaman
On Mon, Jun 19, 2017 at 11:22 AM, Aaryaman Vasishta
<jem456.vasishta@gmail.com> wrote:
> Sorry for the late response, been busy with some personal stuff + work...
>
> On Tue, Jun 13, 2017 at 6:52 AM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:
>>
>> On Mon, Jun 12, 2017 at 5:46 PM, Samuel Pitoiset
>> <samuel.pitoiset@gmail.com> wrote:
>> >
>> >
>> > On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
>> >>
>> >> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and
>> >> exasanv110.fp. Any ideas on what could be causing the first fmul to
>> >> require
>> >> $r0 and/or $r1?
>> >
>> >
>> > 'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'
>> >
>> > is actually:
>> >
>> > 'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'
>
> That's definitely confusing, but quite interesting.
>>
>>
>> Actually more like:
>>
>> tex nodep $r4:$r5:$r6:$r7 $r2:$r3 ...
>
> Any idea why is it this way? The only way I could figure out so far is by
> trial and error, but is there any better way to detect such dependencies?

The t2d means "2 args for source" (with various additional modifiers
which could add arguments). 0xf means return all 4 components (it's a
component mask... e.g. 0x8 would be return only the alpha).

  -ilia
On Tue, Jun 20, 2017 at 1:25 AM, Ilia Mirkin <imirkin@alum.mit.edu> wrote:

> On Mon, Jun 19, 2017 at 11:22 AM, Aaryaman Vasishta
> <jem456.vasishta@gmail.com> wrote:
> > Sorry for the late response, been busy with some personal stuff + work...
> >
> > On Tue, Jun 13, 2017 at 6:52 AM, Ilia Mirkin <imirkin@alum.mit.edu>
> wrote:
> >>
> >> On Mon, Jun 12, 2017 at 5:46 PM, Samuel Pitoiset
> >> <samuel.pitoiset@gmail.com> wrote:
> >> >
> >> >
> >> > On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
> >> >>
> >> >> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and
> >> >> exasanv110.fp. Any ideas on what could be causing the first fmul to
> >> >> require
> >> >> $r0 and/or $r1?
> >> >
> >> >
> >> > 'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'
> >> >
> >> > is actually:
> >> >
> >> > 'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'
> >
> > That's definitely confusing, but quite interesting.
> >>
> >>
> >> Actually more like:
> >>
> >> tex nodep $r4:$r5:$r6:$r7 $r2:$r3 ...
> >
> > Any idea why is it this way? The only way I could figure out so far is by
> > trial and error, but is there any better way to detect such dependencies?
>
> The t2d means "2 args for source" (with various additional modifiers
> which could add arguments). 0xf means return all 4 components (it's a
> component mask... e.g. 0x8 would be return only the alpha).

Ah, I see, makes sense. Thanks for the explanation!

Cheers,
Aaryaman

>
>   -ilia
>