freedreno/ir3: Make imageStore use num components from image format

Submitted by Eduardo Lima Mitev on Dec. 17, 2018, 8:41 p.m.

Details

Message ID 20181217204124.31095-1-elima@igalia.com
State New
Series "freedreno/ir3: Make imageStore use num components from image format"
Headers show

Commit Message

Eduardo Lima Mitev Dec. 17, 2018, 8:41 p.m.
emit_intrinsic_store_image() is always using 4 components when
collecting registers for the value. When image has less than
4 components (e.g, r32f, r32i, r32ui) this results in extra mov
instructions.

This patch uses the actual number of components from the image format.

For example, in a shader like:

layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
...
void main(void) {
   ...
   imageStore (u_image, some_offset, vec4(1.0));
   ...
}

instruction count is reduced in at least 3 instructions (note image
format is r32f, 1 component only).

This obviously reduces register pressure as well.
---
 src/freedreno/ir3/ir3_compiler_nir.c | 34 ++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 85f14f354d2..cc00602c249 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1251,6 +1251,35 @@  emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 	ir3_split_dest(b, dst, sam, 0, 4);
 }
 
+/* Get the number of components of the different image formats supported
+ * by the GLES 3.1 spec.
+ */
+static unsigned
+get_num_components_for_glformat(GLuint format)
+{
+	switch (format) {
+	case GL_R32F:
+	case GL_R32I:
+	case GL_R32UI:
+		return 1;
+
+	case GL_RGBA32F:
+	case GL_RGBA16F:
+	case GL_RGBA8:
+	case GL_RGBA8_SNORM:
+	case GL_RGBA32I:
+	case GL_RGBA16I:
+	case GL_RGBA8I:
+	case GL_RGBA32UI:
+	case GL_RGBA16UI:
+	case GL_RGBA8UI:
+		return 4;
+
+	default:
+		assert(!"Unsupported GL format for image");
+	}
+}
+
 /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
 static void
 emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
@@ -1262,6 +1291,7 @@  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
 	unsigned ncoords = get_image_coords(var, NULL);
 	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	unsigned ncomp = get_num_components_for_glformat(var->data.image.format);
 
 	/* src0 is value
 	 * src1 is coords
@@ -1276,10 +1306,10 @@  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	 */
 
 	stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
-			ir3_create_collect(ctx, value, 4), 0,
+			ir3_create_collect(ctx, value, ncomp), 0,
 			ir3_create_collect(ctx, coords, ncoords), 0,
 			offset, 0);
-	stib->cat6.iim_val = 4;
+	stib->cat6.iim_val = ncomp;
 	stib->cat6.d = ncoords;
 	stib->cat6.type = get_image_type(var);
 	stib->cat6.typed = true;

Comments

Ilia Mirkin Dec. 17, 2018, 9:02 p.m.
Note that the format may not be known. I suspect that falls into your
"default" case.
On Mon, Dec 17, 2018 at 3:41 PM Eduardo Lima Mitev <elima@igalia.com> wrote:
>
> emit_intrinsic_store_image() is always using 4 components when
> collecting registers for the value. When image has less than
> 4 components (e.g, r32f, r32i, r32ui) this results in extra mov
> instructions.
>
> This patch uses the actual number of components from the image format.
>
> For example, in a shader like:
>
> layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
> ...
> void main(void) {
>    ...
>    imageStore (u_image, some_offset, vec4(1.0));
>    ...
> }
>
> instruction count is reduced in at least 3 instructions (note image
> format is r32f, 1 component only).
>
> This obviously reduces register pressure as well.
> ---
>  src/freedreno/ir3/ir3_compiler_nir.c | 34 ++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
> index 85f14f354d2..cc00602c249 100644
> --- a/src/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/freedreno/ir3/ir3_compiler_nir.c
> @@ -1251,6 +1251,35 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
>         ir3_split_dest(b, dst, sam, 0, 4);
>  }
>
> +/* Get the number of components of the different image formats supported
> + * by the GLES 3.1 spec.
> + */
> +static unsigned
> +get_num_components_for_glformat(GLuint format)
> +{
> +       switch (format) {
> +       case GL_R32F:
> +       case GL_R32I:
> +       case GL_R32UI:
> +               return 1;
> +
> +       case GL_RGBA32F:
> +       case GL_RGBA16F:
> +       case GL_RGBA8:
> +       case GL_RGBA8_SNORM:
> +       case GL_RGBA32I:
> +       case GL_RGBA16I:
> +       case GL_RGBA8I:
> +       case GL_RGBA32UI:
> +       case GL_RGBA16UI:
> +       case GL_RGBA8UI:
> +               return 4;
> +
> +       default:
> +               assert(!"Unsupported GL format for image");
> +       }
> +}
> +
>  /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
>  static void
>  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
> @@ -1262,6 +1291,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>         struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
>         unsigned ncoords = get_image_coords(var, NULL);
>         unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
> +       unsigned ncomp = get_num_components_for_glformat(var->data.image.format);
>
>         /* src0 is value
>          * src1 is coords
> @@ -1276,10 +1306,10 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>          */
>
>         stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
> -                       ir3_create_collect(ctx, value, 4), 0,
> +                       ir3_create_collect(ctx, value, ncomp), 0,
>                         ir3_create_collect(ctx, coords, ncoords), 0,
>                         offset, 0);
> -       stib->cat6.iim_val = 4;
> +       stib->cat6.iim_val = ncomp;
>         stib->cat6.d = ncoords;
>         stib->cat6.type = get_image_type(var);
>         stib->cat6.typed = true;
> --
> 2.19.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Eduardo Lima Mitev Dec. 18, 2018, 8:05 a.m.
On 12/17/18 10:02 PM, Ilia Mirkin wrote:
> Note that the format may not be known. I suspect that falls into your
> "default" case.
>

Hi Ilia,

while on GLES profiles the format qualifier must be declared for all
image variables, it is true that core profiles allow to omit it for
writeonly-qualified images.

I guess we can add a special case for GL_NONE to the switch, that also
returns 4 components, to at least not break current behavior if image
format is not known. What do you think?

Eduardo

> On Mon, Dec 17, 2018 at 3:41 PM Eduardo Lima Mitev <elima@igalia.com> wrote:
>>
>> emit_intrinsic_store_image() is always using 4 components when
>> collecting registers for the value. When image has less than
>> 4 components (e.g, r32f, r32i, r32ui) this results in extra mov
>> instructions.
>>
>> This patch uses the actual number of components from the image format.
>>
>> For example, in a shader like:
>>
>> layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
>> ...
>> void main(void) {
>>    ...
>>    imageStore (u_image, some_offset, vec4(1.0));
>>    ...
>> }
>>
>> instruction count is reduced in at least 3 instructions (note image
>> format is r32f, 1 component only).
>>
>> This obviously reduces register pressure as well.
>> ---
>>  src/freedreno/ir3/ir3_compiler_nir.c | 34 ++++++++++++++++++++++++++--
>>  1 file changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
>> index 85f14f354d2..cc00602c249 100644
>> --- a/src/freedreno/ir3/ir3_compiler_nir.c
>> +++ b/src/freedreno/ir3/ir3_compiler_nir.c
>> @@ -1251,6 +1251,35 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
>>         ir3_split_dest(b, dst, sam, 0, 4);
>>  }
>>
>> +/* Get the number of components of the different image formats supported
>> + * by the GLES 3.1 spec.
>> + */
>> +static unsigned
>> +get_num_components_for_glformat(GLuint format)
>> +{
>> +       switch (format) {
>> +       case GL_R32F:
>> +       case GL_R32I:
>> +       case GL_R32UI:
>> +               return 1;
>> +
>> +       case GL_RGBA32F:
>> +       case GL_RGBA16F:
>> +       case GL_RGBA8:
>> +       case GL_RGBA8_SNORM:
>> +       case GL_RGBA32I:
>> +       case GL_RGBA16I:
>> +       case GL_RGBA8I:
>> +       case GL_RGBA32UI:
>> +       case GL_RGBA16UI:
>> +       case GL_RGBA8UI:
>> +               return 4;
>> +
>> +       default:
>> +               assert(!"Unsupported GL format for image");
>> +       }
>> +}
>> +
>>  /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
>>  static void
>>  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>> @@ -1262,6 +1291,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>>         struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
>>         unsigned ncoords = get_image_coords(var, NULL);
>>         unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
>> +       unsigned ncomp = get_num_components_for_glformat(var->data.image.format);
>>
>>         /* src0 is value
>>          * src1 is coords
>> @@ -1276,10 +1306,10 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>>          */
>>
>>         stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
>> -                       ir3_create_collect(ctx, value, 4), 0,
>> +                       ir3_create_collect(ctx, value, ncomp), 0,
>>                         ir3_create_collect(ctx, coords, ncoords), 0,
>>                         offset, 0);
>> -       stib->cat6.iim_val = 4;
>> +       stib->cat6.iim_val = ncomp;
>>         stib->cat6.d = ncoords;
>>         stib->cat6.type = get_image_type(var);
>>         stib->cat6.typed = true;
>> --
>> 2.19.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
Rob Clark Dec. 18, 2018, 1:31 p.m.
On Mon, Dec 17, 2018 at 3:41 PM Eduardo Lima Mitev <elima@igalia.com> wrote:
>
> emit_intrinsic_store_image() is always using 4 components when
> collecting registers for the value. When image has less than
> 4 components (e.g, r32f, r32i, r32ui) this results in extra mov
> instructions.
>
> This patch uses the actual number of components from the image format.
>
> For example, in a shader like:
>
> layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
> ...
> void main(void) {
>    ...
>    imageStore (u_image, some_offset, vec4(1.0));
>    ...
> }
>
> instruction count is reduced in at least 3 instructions (note image
> format is r32f, 1 component only).
>
> This obviously reduces register pressure as well.
> ---
>  src/freedreno/ir3/ir3_compiler_nir.c | 34 ++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
> index 85f14f354d2..cc00602c249 100644
> --- a/src/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/freedreno/ir3/ir3_compiler_nir.c
> @@ -1251,6 +1251,35 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
>         ir3_split_dest(b, dst, sam, 0, 4);
>  }
>
> +/* Get the number of components of the different image formats supported
> + * by the GLES 3.1 spec.
> + */
> +static unsigned
> +get_num_components_for_glformat(GLuint format)
> +{
> +       switch (format) {
> +       case GL_R32F:
> +       case GL_R32I:
> +       case GL_R32UI:
> +               return 1;
> +
> +       case GL_RGBA32F:
> +       case GL_RGBA16F:
> +       case GL_RGBA8:
> +       case GL_RGBA8_SNORM:
> +       case GL_RGBA32I:
> +       case GL_RGBA16I:
> +       case GL_RGBA8I:
> +       case GL_RGBA32UI:
> +       case GL_RGBA16UI:
> +       case GL_RGBA8UI:
> +               return 4;
> +
> +       default:
> +               assert(!"Unsupported GL format for image");

One thought, default could still return 4, at least for release
builds.. that should always be a safe fallback.  But I suppose
addition of new formats maybe doesn't happen so much.  (There is no
glsl_types helper for this?)

If we do have an assert, compile_assert() or ir3_context_error() would
be nice, since (at least for debug builds) should print out the nir
annotated w/ location of error, which I find convenient for debugging.

BR,
-R


> +       }
> +}
> +
>  /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
>  static void
>  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
> @@ -1262,6 +1291,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>         struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
>         unsigned ncoords = get_image_coords(var, NULL);
>         unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
> +       unsigned ncomp = get_num_components_for_glformat(var->data.image.format);
>
>         /* src0 is value
>          * src1 is coords
> @@ -1276,10 +1306,10 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
>          */
>
>         stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
> -                       ir3_create_collect(ctx, value, 4), 0,
> +                       ir3_create_collect(ctx, value, ncomp), 0,
>                         ir3_create_collect(ctx, coords, ncoords), 0,
>                         offset, 0);
> -       stib->cat6.iim_val = 4;
> +       stib->cat6.iim_val = ncomp;
>         stib->cat6.d = ncoords;
>         stib->cat6.type = get_image_type(var);
>         stib->cat6.typed = true;
> --
> 2.19.2
>
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno