radv: try to select better export formats for chips without Rb+

Submitted by Samuel Pitoiset on Jan. 22, 2019, 3:34 p.m.

Details

Message ID 20190122153456.2067-1-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: try to select better export formats for chips without Rb+" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Samuel Pitoiset Jan. 22, 2019, 3:34 p.m.
For some R8 formats, it's useless to export two channels
when no alpha blending is used. I assume the CB should
automatically clamps its inputs.

29077 shaders in 15096 tests
Totals:
SGPRS: 1321106 -> 1320970 (-0.01 %)
VGPRS: 935936 -> 935948 (0.00 %)
Spilled SGPRs: 25186 -> 25204 (0.07 %)
Code Size: 49813556 -> 49796944 (-0.03 %) bytes
Max Waves: 242091 -> 242088 (-0.00 %)

Totals from affected shaders:
SGPRS: 170608 -> 170472 (-0.08 %)
VGPRS: 99752 -> 99764 (0.01 %)
Spilled SGPRs: 10377 -> 10395 (0.17 %)
Code Size: 6332492 -> 6315880 (-0.26 %) bytes
Max Waves: 12317 -> 12314 (-0.02 %)

This helps some Rise Of Tomb Raider shaders, especially
when an expcnt instruction is added between two MRT exports.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_pipeline.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 138e153f9a4..25281c3c6da 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -372,9 +372,10 @@  static bool is_dual_src(VkBlendFactor factor)
 	}
 }
 
-static unsigned si_choose_spi_color_format(VkFormat vk_format,
-					    bool blend_enable,
-					    bool blend_need_alpha)
+static unsigned si_choose_spi_color_format(struct radv_device *device,
+					   VkFormat vk_format,
+					   bool blend_enable,
+					   bool blend_need_alpha)
 {
 	const struct vk_format_description *desc = vk_format_description(vk_format);
 	unsigned format, ntype, swap;
@@ -486,6 +487,20 @@  static unsigned si_choose_spi_color_format(VkFormat vk_format,
 		unreachable("unhandled blend format");
 	}
 
+	if (device && /* NULL for internal meta formats */
+	    !device->physical_device->rbplus_allowed) {
+		/* Try to select better export formats for chips without Rb+. */
+		if (desc->nr_channels == 1 &&
+		    desc->channel[0].size == 8 &&
+		    swap == V_028C70_SWAP_STD && /* R */
+		    !vk_format_is_srgb(vk_format)) {
+			/* Do not need to export two channels for some R8
+			 * formats when alpha blending isn't used.
+			 */
+			blend = normal = V_028714_SPI_SHADER_32_R;
+		}
+	}
+
 	if (blend_enable && blend_need_alpha)
 		return blend_alpha;
 	else if(blend_need_alpha)
@@ -516,7 +531,8 @@  radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 			bool blend_enable =
 				blend->blend_enable_4bit & (0xfu << (i * 4));
 
-			cf = si_choose_spi_color_format(attachment->format,
+			cf = si_choose_spi_color_format(pipeline->device,
+							attachment->format,
 			                                blend_enable,
 			                                blend->need_src_alpha & (1 << i));
 		}
@@ -586,7 +602,8 @@  const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
 
 unsigned radv_format_meta_fs_key(VkFormat format)
 {
-	unsigned col_format = si_choose_spi_color_format(format, false, false);
+	unsigned col_format =
+		si_choose_spi_color_format(NULL, format, false, false);
 
 	assert(col_format != V_028714_SPI_SHADER_32_AR);
 	if (col_format >= V_028714_SPI_SHADER_32_AR)

Comments

On Tue, Jan 22, 2019 at 4:32 PM Samuel Pitoiset
<samuel.pitoiset@gmail.com> wrote:
>
> For some R8 formats, it's useless to export two channels
> when no alpha blending is used. I assume the CB should
> automatically clamps its inputs.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1321106 -> 1320970 (-0.01 %)
> VGPRS: 935936 -> 935948 (0.00 %)
> Spilled SGPRs: 25186 -> 25204 (0.07 %)
> Code Size: 49813556 -> 49796944 (-0.03 %) bytes
> Max Waves: 242091 -> 242088 (-0.00 %)
>
> Totals from affected shaders:
> SGPRS: 170608 -> 170472 (-0.08 %)
> VGPRS: 99752 -> 99764 (0.01 %)
> Spilled SGPRs: 10377 -> 10395 (0.17 %)
> Code Size: 6332492 -> 6315880 (-0.26 %) bytes
> Max Waves: 12317 -> 12314 (-0.02 %)
>
> This helps some Rise Of Tomb Raider shaders, especially
> when an expcnt instruction is added between two MRT exports.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
> ---
>  src/amd/vulkan/radv_pipeline.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 138e153f9a4..25281c3c6da 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -372,9 +372,10 @@ static bool is_dual_src(VkBlendFactor factor)
>         }
>  }
>
> -static unsigned si_choose_spi_color_format(VkFormat vk_format,
> -                                           bool blend_enable,
> -                                           bool blend_need_alpha)
> +static unsigned si_choose_spi_color_format(struct radv_device *device,
> +                                          VkFormat vk_format,
> +                                          bool blend_enable,
> +                                          bool blend_need_alpha)
>  {
>         const struct vk_format_description *desc = vk_format_description(vk_format);
>         unsigned format, ntype, swap;
> @@ -486,6 +487,20 @@ static unsigned si_choose_spi_color_format(VkFormat vk_format,
>                 unreachable("unhandled blend format");
>         }
>
> +       if (device && /* NULL for internal meta formats */
> +           !device->physical_device->rbplus_allowed) {
> +               /* Try to select better export formats for chips without Rb+. */
> +               if (desc->nr_channels == 1 &&
> +                   desc->channel[0].size == 8 &&

Why do we only allow this for 8 bit components? Why not also for
16/32-bit components?

> +                   swap == V_028C70_SWAP_STD && /* R */
> +                   !vk_format_is_srgb(vk_format)) {
> +                       /* Do not need to export two channels for some R8
> +                        * formats when alpha blending isn't used.
> +                        */
> +                       blend = normal = V_028714_SPI_SHADER_32_R;
> +               }
> +       }
> +
>         if (blend_enable && blend_need_alpha)
>                 return blend_alpha;
>         else if(blend_need_alpha)
> @@ -516,7 +531,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
>                         bool blend_enable =
>                                 blend->blend_enable_4bit & (0xfu << (i * 4));
>
> -                       cf = si_choose_spi_color_format(attachment->format,
> +                       cf = si_choose_spi_color_format(pipeline->device,
> +                                                       attachment->format,
>                                                         blend_enable,
>                                                         blend->need_src_alpha & (1 << i));
>                 }
> @@ -586,7 +602,8 @@ const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
>
>  unsigned radv_format_meta_fs_key(VkFormat format)
>  {
> -       unsigned col_format = si_choose_spi_color_format(format, false, false);
> +       unsigned col_format =
> +               si_choose_spi_color_format(NULL, format, false, false);

I think we properly need to plumb through the device here?
>
>         assert(col_format != V_028714_SPI_SHADER_32_AR);
>         if (col_format >= V_028714_SPI_SHADER_32_AR)
> --
> 2.20.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On 1/22/19 10:50 PM, Bas Nieuwenhuizen wrote:
> On Tue, Jan 22, 2019 at 4:32 PM Samuel Pitoiset
> <samuel.pitoiset@gmail.com> wrote:
>> For some R8 formats, it's useless to export two channels
>> when no alpha blending is used. I assume the CB should
>> automatically clamps its inputs.
>>
>> 29077 shaders in 15096 tests
>> Totals:
>> SGPRS: 1321106 -> 1320970 (-0.01 %)
>> VGPRS: 935936 -> 935948 (0.00 %)
>> Spilled SGPRs: 25186 -> 25204 (0.07 %)
>> Code Size: 49813556 -> 49796944 (-0.03 %) bytes
>> Max Waves: 242091 -> 242088 (-0.00 %)
>>
>> Totals from affected shaders:
>> SGPRS: 170608 -> 170472 (-0.08 %)
>> VGPRS: 99752 -> 99764 (0.01 %)
>> Spilled SGPRs: 10377 -> 10395 (0.17 %)
>> Code Size: 6332492 -> 6315880 (-0.26 %) bytes
>> Max Waves: 12317 -> 12314 (-0.02 %)
>>
>> This helps some Rise Of Tomb Raider shaders, especially
>> when an expcnt instruction is added between two MRT exports.
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
>> ---
>>   src/amd/vulkan/radv_pipeline.c | 27 ++++++++++++++++++++++-----
>>   1 file changed, 22 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
>> index 138e153f9a4..25281c3c6da 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -372,9 +372,10 @@ static bool is_dual_src(VkBlendFactor factor)
>>          }
>>   }
>>
>> -static unsigned si_choose_spi_color_format(VkFormat vk_format,
>> -                                           bool blend_enable,
>> -                                           bool blend_need_alpha)
>> +static unsigned si_choose_spi_color_format(struct radv_device *device,
>> +                                          VkFormat vk_format,
>> +                                          bool blend_enable,
>> +                                          bool blend_need_alpha)
>>   {
>>          const struct vk_format_description *desc = vk_format_description(vk_format);
>>          unsigned format, ntype, swap;
>> @@ -486,6 +487,20 @@ static unsigned si_choose_spi_color_format(VkFormat vk_format,
>>                  unreachable("unhandled blend format");
>>          }
>>
>> +       if (device && /* NULL for internal meta formats */
>> +           !device->physical_device->rbplus_allowed) {
>> +               /* Try to select better export formats for chips without Rb+. */
>> +               if (desc->nr_channels == 1 &&
>> +                   desc->channel[0].size == 8 &&
> Why do we only allow this for 8 bit components? Why not also for
> 16/32-bit components?

For 32-bit, that should be already handled in the switch above. For 
16-bit, that should work but this hits a crash in LLVM.

I should probably add a TODO.

>
>> +                   swap == V_028C70_SWAP_STD && /* R */
>> +                   !vk_format_is_srgb(vk_format)) {
>> +                       /* Do not need to export two channels for some R8
>> +                        * formats when alpha blending isn't used.
>> +                        */
>> +                       blend = normal = V_028714_SPI_SHADER_32_R;
>> +               }
>> +       }
>> +
>>          if (blend_enable && blend_need_alpha)
>>                  return blend_alpha;
>>          else if(blend_need_alpha)
>> @@ -516,7 +531,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
>>                          bool blend_enable =
>>                                  blend->blend_enable_4bit & (0xfu << (i * 4));
>>
>> -                       cf = si_choose_spi_color_format(attachment->format,
>> +                       cf = si_choose_spi_color_format(pipeline->device,
>> +                                                       attachment->format,
>>                                                          blend_enable,
>>                                                          blend->need_src_alpha & (1 << i));
>>                  }
>> @@ -586,7 +602,8 @@ const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
>>
>>   unsigned radv_format_meta_fs_key(VkFormat format)
>>   {
>> -       unsigned col_format = si_choose_spi_color_format(format, false, false);
>> +       unsigned col_format =
>> +               si_choose_spi_color_format(NULL, format, false, false);
> I think we properly need to plumb through the device here?

Yeah, but that breaks a bunch of tests. This function looks a bit hacky 
to me.

Requires investigations.

>>          assert(col_format != V_028714_SPI_SHADER_32_AR);
>>          if (col_format >= V_028714_SPI_SHADER_32_AR)
>> --
>> 2.20.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev