[2/6] nir/lower_tex: Add a way to lower TXS(non-0-LOD) instructions

Submitted by Boris Brezillon on June 17, 2019, 10:49 a.m.

Details

Message ID 20190617104928.24007-3-boris.brezillon@collabora.com
State New
Headers show
Series "panfrost: Add support for TXS instructions" ( rev: 2 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Boris Brezillon June 17, 2019, 10:49 a.m.
The V3D driver has an open-coded solution for this, and we need the
same thing for Panfrost, so let's add a generic way to lower TXS(LOD)
into max(TXS(0) >> LOD, 1).

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 src/compiler/nir/nir.h           |  6 ++++
 src/compiler/nir/nir_lower_tex.c | 49 ++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4270df565111..8972b4af7480 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3426,6 +3426,12 @@  typedef struct nir_lower_tex_options {
     */
    bool lower_txd_clamp_if_sampler_index_not_lt_16;
 
+   /**
+    * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
+    * 0-lod followed by a nir_ishr.
+    */
+   bool lower_txs_lod;
+
    /**
     * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
     * mixed-up tg4 locations.
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 53719017a87f..c29581d9b048 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -978,6 +978,50 @@  lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
    return true;
 }
 
+static bool
+nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
+{
+   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+   if (lod_idx < 0 ||
+       (nir_src_is_const(tex->src[lod_idx].src) &&
+        !nir_src_as_int(tex->src[lod_idx].src)))
+      return false;
+
+   nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
+   unsigned dest_size = nir_tex_instr_dest_size(tex);
+   nir_ssa_def *shift, *min, *result;
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   switch (dest_size) {
+   case 3:
+      shift = nir_vec3(b, lod, lod, tex->is_array ? nir_imm_int(b, 0) : lod);
+      min = nir_imm_ivec3(b, 1, 1, tex->is_array ? 0 : 1);
+      break;
+   case 2:
+      shift = nir_vec2(b, lod, tex->is_array ? nir_imm_int(b, 0) : lod);
+      min = nir_imm_ivec2(b, 1, tex->is_array ? 0 : 1);
+      break;
+   case 1:
+      shift = lod;
+      min = nir_imm_int(b, 1);
+      break;
+   default:
+      unreachable("Invalid nir_tex_instr_dest_size()\n");
+   }
+
+   /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
+   result = nir_imax(b, nir_ishr(b, &tex->dest.ssa, shift), min);
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
+                                  result->parent_instr);
+
+   /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
+   b->cursor = nir_before_instr(&tex->instr);
+   nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
+                         nir_src_for_ssa(nir_imm_int(b, 0)));
+   return true;
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, nir_builder *b,
                     const nir_lower_tex_options *options)
@@ -1132,6 +1176,11 @@  nir_lower_tex_block(nir_block *block, nir_builder *b,
          continue;
       }
 
+      if (options->lower_txs_lod && tex->op == nir_texop_txs) {
+         progress |= nir_lower_txs_lod(b, tex);
+         continue;
+      }
+
       /* has to happen after all the other lowerings as the original tg4 gets
        * replaced by 4 tg4 instructions.
        */

Comments



On Mon, 17 Jun 2019 10:53:47 -0500
Jason Ekstrand <jason@jlekstrand.net> wrote:

> On Mon, Jun 17, 2019 at 5:49 AM Boris Brezillon <
> boris.brezillon@collabora.com> wrote:  
> 
> > The V3D driver has an open-coded solution for this, and we need the
> > same thing for Panfrost, so let's add a generic way to lower TXS(LOD)
> > into max(TXS(0) >> LOD, 1).
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> >  src/compiler/nir/nir.h           |  6 ++++
> >  src/compiler/nir/nir_lower_tex.c | 49 ++++++++++++++++++++++++++++++++
> >  2 files changed, 55 insertions(+)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 4270df565111..8972b4af7480 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -3426,6 +3426,12 @@ typedef struct nir_lower_tex_options {
> >      */
> >     bool lower_txd_clamp_if_sampler_index_not_lt_16;
> >
> > +   /**
> > +    * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs
> > with
> > +    * 0-lod followed by a nir_ishr.
> > +    */
> > +   bool lower_txs_lod;
> > +
> >     /**
> >      * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
> >      * mixed-up tg4 locations.
> > diff --git a/src/compiler/nir/nir_lower_tex.c
> > b/src/compiler/nir/nir_lower_tex.c
> > index 53719017a87f..c29581d9b048 100644
> > --- a/src/compiler/nir/nir_lower_tex.c
> > +++ b/src/compiler/nir/nir_lower_tex.c
> > @@ -978,6 +978,50 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
> >     return true;
> >  }
> >
> > +static bool
> > +nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
> > +{
> > +   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
> > +   if (lod_idx < 0 ||
> > +       (nir_src_is_const(tex->src[lod_idx].src) &&
> > +        !nir_src_as_int(tex->src[lod_idx].src)))
> >  
> 
> Please use == 0 instead of ! here.  We're checking an integer, not a
> boolean.

Sure, I'll change that.

> 
> 
> > +      return false;
> > +
> > +   nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
> >  
> 
> In theory, this can emit instructions.  We should set the cursor before
> calling it.

Oops, I'll move the cursor initialization before this line.

> 
> 
> > +   unsigned dest_size = nir_tex_instr_dest_size(tex);
> > +   nir_ssa_def *shift, *min, *result;
> > +
> > +   b->cursor = nir_after_instr(&tex->instr);
> > +
> > +   switch (dest_size) {
> > +   case 3:
> > +      shift = nir_vec3(b, lod, lod, tex->is_array ? nir_imm_int(b, 0) :
> > lod);
> > +      min = nir_imm_ivec3(b, 1, 1, tex->is_array ? 0 : 1);
> > +      break;
> > +   case 2:
> > +      shift = nir_vec2(b, lod, tex->is_array ? nir_imm_int(b, 0) : lod);
> > +      min = nir_imm_ivec2(b, 1, tex->is_array ? 0 : 1);
> > +      break;
> > +   case 1:
> > +      shift = lod;
> > +      min = nir_imm_int(b, 1);
> > +      break;
> > +   default:
> > +      unreachable("Invalid nir_tex_instr_dest_size()\n");
> > +   }
> > +
> > +   /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
> > +   result = nir_imax(b, nir_ishr(b, &tex->dest.ssa, shift), min);
> >  
> 
> I think it will actually emit less code and be slightly simpler if you do
> 
> nir_ssa_def *minified =
>    nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod), nir_imm_int(b, 1));

Are you sure for the ushr()? lod is supposed to be an int, so I'd
expect to have an ishr() here (note that I don't even handle the
lod < 0 case as I'm not sure what's supposed to be done in that case).

> if (tex->is_array) {
>    nir_ssa_def *comp[4];
>    for (unsigned i = 0; i < dest_size - 1; i++)
>       comp[i] = nir_component(b, minified, i);
>    comp[dest_size - 1] = nir_component(b, &tex->dest.ssa, dest_size - 1);
>    minified = nir_vec(b, comp, dest_size);
> }
> 
> That way, it generates one vec() operation instead of two.  Note that you
> don't need to explcitly expand lod out to three components as nir_builder
> will do that for you.

Oh, good to know.

> 
> 
> > +   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
> > +                                  result->parent_instr);
> > +
> > +   /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
> > +   b->cursor = nir_before_instr(&tex->instr);
> > +   nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
> > +                         nir_src_for_ssa(nir_imm_int(b, 0)));
> >  
> 
> Personally, I'd put this right after we grab the LOD from the instruction
> but this is fine too.

Okay, I'll move it there.

Thanks for the review.

Boris