[Mesa-dev,21/32] i965/vec4: Fix the scheduler to take into account reads and writes of multiple registers.

Submitted by Francisco Jerez on Feb. 6, 2015, 2:43 p.m.

Details

Message ID 1423233792-11767-21-git-send-email-currojerez@riseup.net
State New
Headers show

Not browsing as part of any series.

Commit Message

Francisco Jerez Feb. 6, 2015, 2:43 p.m.
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h                 |  1 +
 src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++++++++++-----
 src/mesa/drivers/dri/i965/brw_vec4.cpp                  | 17 +++++++++++++++++
 3 files changed, 28 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index f11a2d2..941086f 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -171,6 +171,7 @@  public:
    unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
 
    bool is_send_from_grf();
+   unsigned regs_read(unsigned arg) const;
    bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
    void reswizzle(int dst_writemask, int swizzle);
    bool can_do_source_mods(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 88feeca..2b22b2c 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1063,7 +1063,8 @@  vec4_instruction_scheduler::calculate_deps()
       /* read-after-write deps. */
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-            add_dep(last_grf_write[inst->src[i].reg], n);
+            for (unsigned j = 0; j < inst->regs_read(i); ++j)
+               add_dep(last_grf_write[inst->src[i].reg + j], n);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1103,8 +1104,10 @@  vec4_instruction_scheduler::calculate_deps()
 
       /* write-after-write deps. */
       if (inst->dst.file == GRF) {
-         add_dep(last_grf_write[inst->dst.reg], n);
-         last_grf_write[inst->dst.reg] = n;
+         for (unsigned j = 0; j < inst->regs_written; ++j) {
+            add_dep(last_grf_write[inst->dst.reg + j], n);
+            last_grf_write[inst->dst.reg + j] = n;
+         }
       } else if (inst->dst.file == MRF) {
          add_dep(last_mrf_write[inst->dst.reg], n);
          last_mrf_write[inst->dst.reg] = n;
@@ -1156,7 +1159,8 @@  vec4_instruction_scheduler::calculate_deps()
       /* write-after-read deps. */
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
-            add_dep(n, last_grf_write[inst->src[i].reg]);
+            for (unsigned j = 0; j < inst->regs_read(i); ++j)
+               add_dep(n, last_grf_write[inst->src[i].reg + j]);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1194,7 +1198,8 @@  vec4_instruction_scheduler::calculate_deps()
        * can mark this as WAR dependency.
        */
       if (inst->dst.file == GRF) {
-         last_grf_write[inst->dst.reg] = n;
+         for (unsigned j = 0; j < inst->regs_written; ++j)
+            last_grf_write[inst->dst.reg + j] = n;
       } else if (inst->dst.file == MRF) {
          last_mrf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == HW_REG &&
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a4fd136..b2f3151 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -262,6 +262,23 @@  vec4_instruction::is_send_from_grf()
    }
 }
 
+unsigned
+vec4_instruction::regs_read(unsigned arg) const
+{
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+      return (arg == 0 ? mlen :
+              src[arg].file != BAD_FILE ? 1 : 0);
+
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return (arg == 1 ? mlen :
+              src[arg].file != BAD_FILE ? 1 : 0);
+
+   default:
+      return (src[arg].file != BAD_FILE ? 1 : 0);
+   }
+}
+
 bool
 vec4_instruction::can_do_source_mods(struct brw_context *brw)
 {

Comments

On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <currojerez@riseup.net> wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h                 |  1 +
>  src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++++++++++-----
>  src/mesa/drivers/dri/i965/brw_vec4.cpp                  | 17 +++++++++++++++++
>  3 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index f11a2d2..941086f 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -171,6 +171,7 @@ public:
>     unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
>
>     bool is_send_from_grf();
> +   unsigned regs_read(unsigned arg) const;
>     bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
>     void reswizzle(int dst_writemask, int swizzle);
>     bool can_do_source_mods(struct brw_context *brw);
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index 88feeca..2b22b2c 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps()
>        /* read-after-write deps. */
>        for (int i = 0; i < 3; i++) {
>           if (inst->src[i].file == GRF) {
> -            add_dep(last_grf_write[inst->src[i].reg], n);
> +            for (unsigned j = 0; j < inst->regs_read(i); ++j)
> +               add_dep(last_grf_write[inst->src[i].reg + j], n);
>           } else if (inst->src[i].file == HW_REG &&
>                      (inst->src[i].fixed_hw_reg.file ==
>                       BRW_GENERAL_REGISTER_FILE)) {
> @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps()
>
>        /* write-after-write deps. */
>        if (inst->dst.file == GRF) {
> -         add_dep(last_grf_write[inst->dst.reg], n);
> -         last_grf_write[inst->dst.reg] = n;
> +         for (unsigned j = 0; j < inst->regs_written; ++j) {
> +            add_dep(last_grf_write[inst->dst.reg + j], n);
> +            last_grf_write[inst->dst.reg + j] = n;
> +         }
>        } else if (inst->dst.file == MRF) {
>           add_dep(last_mrf_write[inst->dst.reg], n);
>           last_mrf_write[inst->dst.reg] = n;
> @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps()
>        /* write-after-read deps. */
>        for (int i = 0; i < 3; i++) {
>           if (inst->src[i].file == GRF) {
> -            add_dep(n, last_grf_write[inst->src[i].reg]);
> +            for (unsigned j = 0; j < inst->regs_read(i); ++j)
> +               add_dep(n, last_grf_write[inst->src[i].reg + j]);
>           } else if (inst->src[i].file == HW_REG &&
>                      (inst->src[i].fixed_hw_reg.file ==
>                       BRW_GENERAL_REGISTER_FILE)) {
> @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps()
>         * can mark this as WAR dependency.
>         */
>        if (inst->dst.file == GRF) {
> -         last_grf_write[inst->dst.reg] = n;
> +         for (unsigned j = 0; j < inst->regs_written; ++j)
> +            last_grf_write[inst->dst.reg + j] = n;
>        } else if (inst->dst.file == MRF) {
>           last_mrf_write[inst->dst.reg] = n;
>        } else if (inst->dst.file == HW_REG &&
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index a4fd136..b2f3151 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf()
>     }
>  }
>
> +unsigned
> +vec4_instruction::regs_read(unsigned arg) const
> +{
> +   switch (opcode) {
> +   case SHADER_OPCODE_SHADER_TIME_ADD:
> +      return (arg == 0 ? mlen :
> +              src[arg].file != BAD_FILE ? 1 : 0);
> +
> +   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> +      return (arg == 1 ? mlen :
> +              src[arg].file != BAD_FILE ? 1 : 0);
> +
> +   default:
> +      return (src[arg].file != BAD_FILE ? 1 : 0);
> +   }
> +}

Better to just do

   if (src[arg].file == BAD_FILE)
      return 0;

before the switch statement to avoid the nested ternary, or do you
actually need to test arg == 0/1 before checking .file in those two
cases?
Matt Turner <mattst88@gmail.com> writes:

> On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <currojerez@riseup.net> wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_ir_vec4.h                 |  1 +
>>  src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++++++++++-----
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp                  | 17 +++++++++++++++++
>>  3 files changed, 28 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> index f11a2d2..941086f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> @@ -171,6 +171,7 @@ public:
>>     unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
>>
>>     bool is_send_from_grf();
>> +   unsigned regs_read(unsigned arg) const;
>>     bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
>>     void reswizzle(int dst_writemask, int swizzle);
>>     bool can_do_source_mods(struct brw_context *brw);
>> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> index 88feeca..2b22b2c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps()
>>        /* read-after-write deps. */
>>        for (int i = 0; i < 3; i++) {
>>           if (inst->src[i].file == GRF) {
>> -            add_dep(last_grf_write[inst->src[i].reg], n);
>> +            for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> +               add_dep(last_grf_write[inst->src[i].reg + j], n);
>>           } else if (inst->src[i].file == HW_REG &&
>>                      (inst->src[i].fixed_hw_reg.file ==
>>                       BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps()
>>
>>        /* write-after-write deps. */
>>        if (inst->dst.file == GRF) {
>> -         add_dep(last_grf_write[inst->dst.reg], n);
>> -         last_grf_write[inst->dst.reg] = n;
>> +         for (unsigned j = 0; j < inst->regs_written; ++j) {
>> +            add_dep(last_grf_write[inst->dst.reg + j], n);
>> +            last_grf_write[inst->dst.reg + j] = n;
>> +         }
>>        } else if (inst->dst.file == MRF) {
>>           add_dep(last_mrf_write[inst->dst.reg], n);
>>           last_mrf_write[inst->dst.reg] = n;
>> @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps()
>>        /* write-after-read deps. */
>>        for (int i = 0; i < 3; i++) {
>>           if (inst->src[i].file == GRF) {
>> -            add_dep(n, last_grf_write[inst->src[i].reg]);
>> +            for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> +               add_dep(n, last_grf_write[inst->src[i].reg + j]);
>>           } else if (inst->src[i].file == HW_REG &&
>>                      (inst->src[i].fixed_hw_reg.file ==
>>                       BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps()
>>         * can mark this as WAR dependency.
>>         */
>>        if (inst->dst.file == GRF) {
>> -         last_grf_write[inst->dst.reg] = n;
>> +         for (unsigned j = 0; j < inst->regs_written; ++j)
>> +            last_grf_write[inst->dst.reg + j] = n;
>>        } else if (inst->dst.file == MRF) {
>>           last_mrf_write[inst->dst.reg] = n;
>>        } else if (inst->dst.file == HW_REG &&
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a4fd136..b2f3151 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf()
>>     }
>>  }
>>
>> +unsigned
>> +vec4_instruction::regs_read(unsigned arg) const
>> +{
>> +   switch (opcode) {
>> +   case SHADER_OPCODE_SHADER_TIME_ADD:
>> +      return (arg == 0 ? mlen :
>> +              src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> +   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>> +      return (arg == 1 ? mlen :
>> +              src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> +   default:
>> +      return (src[arg].file != BAD_FILE ? 1 : 0);
>> +   }
>> +}
>
> Better to just do
>
>    if (src[arg].file == BAD_FILE)
>       return 0;
>
> before the switch statement to avoid the nested ternary, or do you
> actually need to test arg == 0/1 before checking .file in those two
> cases?

Yeah, I've fixed that locally, thanks.
On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <currojerez@riseup.net> wrote:
> ---

We don't have any operations today that return more than a single
register in the vec4 backend, do we? Presumably this is partly
preparation for image_load_store?

Reviewed-by: Matt Turner <mattst88@gmail.com>
Matt Turner <mattst88@gmail.com> writes:

> On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <currojerez@riseup.net> wrote:
>> ---
>
> We don't have any operations today that return more than a single
> register in the vec4 backend, do we? Presumably this is partly
> preparation for image_load_store?
>
Yeah, of course :).

> Reviewed-by: Matt Turner <mattst88@gmail.com>

Thanks!