[Mesa-dev] radeon/llvm: don't use a static array size for radeon_llvm_context::arrays

Submitted by Marek Olšák on May 26, 2015, 2:04 p.m.

Details

Message ID 1432649085-10401-1-git-send-email-maraeo@gmail.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Marek Olšák May 26, 2015, 2:04 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 8612ef8..ec4c343 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -33,7 +33,6 @@ 
 
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-#define RADEON_LLVM_MAX_ARRAYS 16
 
 #define RADEON_LLVM_INITIAL_CF_DEPTH 4
 
@@ -130,8 +129,8 @@  struct radeon_llvm_context {
 	unsigned loop_depth;
 	unsigned loop_depth_max;
 
-	struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
-	unsigned num_arrays;
+	struct tgsi_declaration_range *arrays;
+	unsigned max_num_arrays;
 
 	LLVMValueRef main_fn;
 
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 8638537..af5eb88 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -85,8 +85,9 @@  get_array_range(struct lp_build_tgsi_context *bld_base,
 		unsigned File, const struct tgsi_ind_register *reg)
 {
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+
 	if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
-            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
+	    reg->ArrayID-1 >= ctx->max_num_arrays) {
 		struct tgsi_declaration_range range;
 		range.First = 0;
 		range.Last = bld_base->info->file_max[File];
@@ -252,8 +253,14 @@  static void emit_declaration(
 	}
 
 	case TGSI_FILE_TEMPORARY:
-		if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
+		if (decl->Declaration.Array) {
+			if (decl->Array.ArrayID-1 >= ctx->max_num_arrays) {
+				ctx->max_num_arrays += 32;
+				ctx->arrays = realloc(ctx->arrays,
+						sizeof(ctx->arrays[0]) * ctx->max_num_arrays);
+			}
 			ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
+		}
 		if (uses_temp_indirect_addressing(bld_base)) {
 			lp_emit_declaration_soa(bld_base, decl);
 			break;
@@ -1432,8 +1439,6 @@  void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	/* Allocate outputs */
 	ctx->soa.outputs = ctx->outputs;
 
-	ctx->num_arrays = 0;
-
 	/* XXX: Is there a better way to initialize all this ? */
 
 	lp_set_default_actions(bld_base);
@@ -1622,6 +1627,8 @@  void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
 {
 	LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
 	LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
+	FREE(ctx->arrays);
+	ctx->arrays = NULL;
 	FREE(ctx->temps);
 	ctx->temps = NULL;
 	FREE(ctx->loop);

Comments

On 26/05/15 14:04, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> ---
>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++----
>  2 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index 8612ef8..ec4c343 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -33,7 +33,6 @@
>  
>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
> -#define RADEON_LLVM_MAX_ARRAYS 16
>  
>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
>  
> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
>  	unsigned loop_depth;
>  	unsigned loop_depth_max;
>  
> -	struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
> -	unsigned num_arrays;
> +	struct tgsi_declaration_range *arrays;
> +	unsigned max_num_arrays;
>  
>  	LLVMValueRef main_fn;
>  
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 8638537..af5eb88 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
>  		unsigned File, const struct tgsi_ind_register *reg)
>  {
>  	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
> +
>  	if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
> +	    reg->ArrayID-1 >= ctx->max_num_arrays) {
>  		struct tgsi_declaration_range range;
>  		range.First = 0;
>  		range.Last = bld_base->info->file_max[File];
> @@ -252,8 +253,14 @@ static void emit_declaration(
>  	}
>  
>  	case TGSI_FILE_TEMPORARY:
> -		if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
> +		if (decl->Declaration.Array) {
> +			if (decl->Array.ArrayID-1 >= ctx->max_num_arrays) {
> +				ctx->max_num_arrays += 32;
> +				ctx->arrays = realloc(ctx->arrays,
You should use the REALLOC wrapper, considering the FREE below.

> +						sizeof(ctx->arrays[0]) * ctx->max_num_arrays);
> +			}
>  			ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
> +		}
>  		if (uses_temp_indirect_addressing(bld_base)) {
>  			lp_emit_declaration_soa(bld_base, decl);
>  			break;
> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  	/* Allocate outputs */
>  	ctx->soa.outputs = ctx->outputs;
>  
> -	ctx->num_arrays = 0;
> -
>  	/* XXX: Is there a better way to initialize all this ? */
>  
>  	lp_set_default_actions(bld_base);
> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
>  {
>  	LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
>  	LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
> +	FREE(ctx->arrays);
> +	ctx->arrays = NULL;
Not particularly familiar with the radeon driver(s) so this might come a
bit silly - shouldn't one zero max_num_arrays at this stage ?

Similar counters (temps_count, output_reg_count) seems to be left
unchanged, while others (branch_depth_max, loop_depth_max) are reset.

Thanks
Emil
On Tue, May 26, 2015 at 10:19 PM, Emil Velikov <emil.l.velikov@gmail.com> wrote:
> On 26/05/15 14:04, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak@amd.com>
>>
>> ---
>>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
>>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++----
>>  2 files changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
>> index 8612ef8..ec4c343 100644
>> --- a/src/gallium/drivers/radeon/radeon_llvm.h
>> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
>> @@ -33,7 +33,6 @@
>>
>>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
>>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
>> -#define RADEON_LLVM_MAX_ARRAYS 16
>>
>>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
>>
>> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
>>       unsigned loop_depth;
>>       unsigned loop_depth_max;
>>
>> -     struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
>> -     unsigned num_arrays;
>> +     struct tgsi_declaration_range *arrays;
>> +     unsigned max_num_arrays;
>>
>>       LLVMValueRef main_fn;
>>
>> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> index 8638537..af5eb88 100644
>> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
>>               unsigned File, const struct tgsi_ind_register *reg)
>>  {
>>       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
>> +
>>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
>> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
>> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
>>               struct tgsi_declaration_range range;
>>               range.First = 0;
>>               range.Last = bld_base->info->file_max[File];
>> @@ -252,8 +253,14 @@ static void emit_declaration(
>>       }
>>
>>       case TGSI_FILE_TEMPORARY:
>> -             if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
>> +             if (decl->Declaration.Array) {
>> +                     if (decl->Array.ArrayID-1 >= ctx->max_num_arrays) {
>> +                             ctx->max_num_arrays += 32;
>> +                             ctx->arrays = realloc(ctx->arrays,
> You should use the REALLOC wrapper, considering the FREE below.
>
>> +                                             sizeof(ctx->arrays[0]) * ctx->max_num_arrays);
>> +                     }
>>                       ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
>> +             }
>>               if (uses_temp_indirect_addressing(bld_base)) {
>>                       lp_emit_declaration_soa(bld_base, decl);
>>                       break;
>> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>>       /* Allocate outputs */
>>       ctx->soa.outputs = ctx->outputs;
>>
>> -     ctx->num_arrays = 0;
>> -
>>       /* XXX: Is there a better way to initialize all this ? */
>>
>>       lp_set_default_actions(bld_base);
>> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
>>  {
>>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
>>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
>> +     FREE(ctx->arrays);
>> +     ctx->arrays = NULL;
> Not particularly familiar with the radeon driver(s) so this might come a
> bit silly - shouldn't one zero max_num_arrays at this stage ?

Yes, I've sent a new patch for that.
>
> Similar counters (temps_count, output_reg_count) seems to be left
> unchanged, while others (branch_depth_max, loop_depth_max) are reset.

I've sent a patch for temps_count. I'm not touching output_reg_count,
because it's only used by the dying r600g LLVM backend.

Marek
On Wed, 2015-05-27 at 00:36 +0200, Marek Olšák wrote:
> On Tue, May 26, 2015 at 10:19 PM, Emil Velikov <emil.l.velikov@gmail.com> wrote:
> > On 26/05/15 14:04, Marek Olšák wrote:
> >> From: Marek Olšák <marek.olsak@amd.com>
> >>
> >> ---
> >>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++----
> >>  2 files changed, 13 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> >> index 8612ef8..ec4c343 100644
> >> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> >> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> >> @@ -33,7 +33,6 @@
> >>
> >>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
> >>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
> >> -#define RADEON_LLVM_MAX_ARRAYS 16
> >>
> >>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
> >>
> >> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
> >>       unsigned loop_depth;
> >>       unsigned loop_depth_max;
> >>
> >> -     struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
> >> -     unsigned num_arrays;
> >> +     struct tgsi_declaration_range *arrays;
> >> +     unsigned max_num_arrays;
> >>
> >>       LLVMValueRef main_fn;
> >>
> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> index 8638537..af5eb88 100644
> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
> >>               unsigned File, const struct tgsi_ind_register *reg)
> >>  {
> >>       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
> >> +
> >>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
> >> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
> >> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
> >>               struct tgsi_declaration_range range;
> >>               range.First = 0;
> >>               range.Last = bld_base->info->file_max[File];
> >> @@ -252,8 +253,14 @@ static void emit_declaration(
> >>       }
> >>
> >>       case TGSI_FILE_TEMPORARY:
> >> -             if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
> >> +             if (decl->Declaration.Array) {
> >> +                     if (decl->Array.ArrayID-1 >= ctx->max_num_arrays) {
> >> +                             ctx->max_num_arrays += 32;
> >> +                             ctx->arrays = realloc(ctx->arrays,
> > You should use the REALLOC wrapper, considering the FREE below.
> >
> >> +                                             sizeof(ctx->arrays[0]) * ctx->max_num_arrays);
> >> +                     }
> >>                       ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
> >> +             }
> >>               if (uses_temp_indirect_addressing(bld_base)) {
> >>                       lp_emit_declaration_soa(bld_base, decl);
> >>                       break;
> >> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
> >>       /* Allocate outputs */
> >>       ctx->soa.outputs = ctx->outputs;
> >>
> >> -     ctx->num_arrays = 0;
> >> -
> >>       /* XXX: Is there a better way to initialize all this ? */
> >>
> >>       lp_set_default_actions(bld_base);
> >> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
> >>  {
> >>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
> >>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
> >> +     FREE(ctx->arrays);
> >> +     ctx->arrays = NULL;
> > Not particularly familiar with the radeon driver(s) so this might come a
> > bit silly - shouldn't one zero max_num_arrays at this stage ?
> 
> Yes, I've sent a new patch for that.
> >
> > Similar counters (temps_count, output_reg_count) seems to be left
> > unchanged, while others (branch_depth_max, loop_depth_max) are reset.
> 
> I've sent a patch for temps_count. I'm not touching output_reg_count,
> because it's only used by the dying r600g LLVM backend.

just out of curiosity, why is it dying?

jan

> 
> Marek
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Wed, May 27, 2015 at 12:39 AM, Jan Vesely <jan.vesely@rutgers.edu> wrote:
> On Wed, 2015-05-27 at 00:36 +0200, Marek Olšák wrote:
>> On Tue, May 26, 2015 at 10:19 PM, Emil Velikov <emil.l.velikov@gmail.com> wrote:
>> > On 26/05/15 14:04, Marek Olšák wrote:
>> >> From: Marek Olšák <marek.olsak@amd.com>
>> >>
>> >> ---
>> >>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
>> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++++++++++----
>> >>  2 files changed, 13 insertions(+), 7 deletions(-)
>> >>
>> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
>> >> index 8612ef8..ec4c343 100644
>> >> --- a/src/gallium/drivers/radeon/radeon_llvm.h
>> >> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
>> >> @@ -33,7 +33,6 @@
>> >>
>> >>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
>> >>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
>> >> -#define RADEON_LLVM_MAX_ARRAYS 16
>> >>
>> >>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
>> >>
>> >> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
>> >>       unsigned loop_depth;
>> >>       unsigned loop_depth_max;
>> >>
>> >> -     struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
>> >> -     unsigned num_arrays;
>> >> +     struct tgsi_declaration_range *arrays;
>> >> +     unsigned max_num_arrays;
>> >>
>> >>       LLVMValueRef main_fn;
>> >>
>> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> index 8638537..af5eb88 100644
>> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
>> >>               unsigned File, const struct tgsi_ind_register *reg)
>> >>  {
>> >>       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
>> >> +
>> >>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
>> >> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
>> >> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
>> >>               struct tgsi_declaration_range range;
>> >>               range.First = 0;
>> >>               range.Last = bld_base->info->file_max[File];
>> >> @@ -252,8 +253,14 @@ static void emit_declaration(
>> >>       }
>> >>
>> >>       case TGSI_FILE_TEMPORARY:
>> >> -             if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
>> >> +             if (decl->Declaration.Array) {
>> >> +                     if (decl->Array.ArrayID-1 >= ctx->max_num_arrays) {
>> >> +                             ctx->max_num_arrays += 32;
>> >> +                             ctx->arrays = realloc(ctx->arrays,
>> > You should use the REALLOC wrapper, considering the FREE below.
>> >
>> >> +                                             sizeof(ctx->arrays[0]) * ctx->max_num_arrays);
>> >> +                     }
>> >>                       ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
>> >> +             }
>> >>               if (uses_temp_indirect_addressing(bld_base)) {
>> >>                       lp_emit_declaration_soa(bld_base, decl);
>> >>                       break;
>> >> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>> >>       /* Allocate outputs */
>> >>       ctx->soa.outputs = ctx->outputs;
>> >>
>> >> -     ctx->num_arrays = 0;
>> >> -
>> >>       /* XXX: Is there a better way to initialize all this ? */
>> >>
>> >>       lp_set_default_actions(bld_base);
>> >> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
>> >>  {
>> >>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
>> >>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
>> >> +     FREE(ctx->arrays);
>> >> +     ctx->arrays = NULL;
>> > Not particularly familiar with the radeon driver(s) so this might come a
>> > bit silly - shouldn't one zero max_num_arrays at this stage ?
>>
>> Yes, I've sent a new patch for that.
>> >
>> > Similar counters (temps_count, output_reg_count) seems to be left
>> > unchanged, while others (branch_depth_max, loop_depth_max) are reset.
>>
>> I've sent a patch for temps_count. I'm not touching output_reg_count,
>> because it's only used by the dying r600g LLVM backend.
>
> just out of curiosity, why is it dying?

The OpenGL shader support is dying because the LLVM backend is not
used by default, it lacks some features, and SB should perform
equally, if not better.

The LLVM backend is still important for OpenCL though.

Marek
On Tue, May 26, 2015 at 5:45 PM, Marek Olšák <maraeo@gmail.com> wrote:

> On Wed, May 27, 2015 at 12:39 AM, Jan Vesely <jan.vesely@rutgers.edu>
> wrote:
> > On Wed, 2015-05-27 at 00:36 +0200, Marek Olšák wrote:
> >> On Tue, May 26, 2015 at 10:19 PM, Emil Velikov <
> emil.l.velikov@gmail.com> wrote:
> >> > On 26/05/15 14:04, Marek Olšák wrote:
> >> >> From: Marek Olšák <marek.olsak@amd.com>
> >> >>
> >> >> ---
> >> >>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
> >> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15
> +++++++++++----
> >> >>  2 files changed, 13 insertions(+), 7 deletions(-)
> >> >>
> >> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h
> b/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> index 8612ef8..ec4c343 100644
> >> >> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> @@ -33,7 +33,6 @@
> >> >>
> >> >>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
> >> >>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
> >> >> -#define RADEON_LLVM_MAX_ARRAYS 16
> >> >>
> >> >>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
> >> >>
> >> >> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
> >> >>       unsigned loop_depth;
> >> >>       unsigned loop_depth_max;
> >> >>
> >> >> -     struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
> >> >> -     unsigned num_arrays;
> >> >> +     struct tgsi_declaration_range *arrays;
> >> >> +     unsigned max_num_arrays;
> >> >>
> >> >>       LLVMValueRef main_fn;
> >> >>
> >> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> index 8638537..af5eb88 100644
> >> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context
> *bld_base,
> >> >>               unsigned File, const struct tgsi_ind_register *reg)
> >> >>  {
> >> >>       struct radeon_llvm_context * ctx =
> radeon_llvm_context(bld_base);
> >> >> +
> >> >>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
> >> >> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
> >> >> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
> >> >>               struct tgsi_declaration_range range;
> >> >>               range.First = 0;
> >> >>               range.Last = bld_base->info->file_max[File];
> >> >> @@ -252,8 +253,14 @@ static void emit_declaration(
> >> >>       }
> >> >>
> >> >>       case TGSI_FILE_TEMPORARY:
> >> >> -             if (decl->Declaration.Array && decl->Array.ArrayID <=
> RADEON_LLVM_MAX_ARRAYS)
> >> >> +             if (decl->Declaration.Array) {
> >> >> +                     if (decl->Array.ArrayID-1 >=
> ctx->max_num_arrays) {
> >> >> +                             ctx->max_num_arrays += 32;
> >> >> +                             ctx->arrays = realloc(ctx->arrays,
> >> > You should use the REALLOC wrapper, considering the FREE below.
> >> >
> >> >> +                                             sizeof(ctx->arrays[0])
> * ctx->max_num_arrays);
> >> >> +                     }
> >> >>                       ctx->arrays[decl->Array.ArrayID - 1] =
> decl->Range;
> >> >> +             }
> >> >>               if (uses_temp_indirect_addressing(bld_base)) {
> >> >>                       lp_emit_declaration_soa(bld_base, decl);
> >> >>                       break;
> >> >> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct
> radeon_llvm_context * ctx)
> >> >>       /* Allocate outputs */
> >> >>       ctx->soa.outputs = ctx->outputs;
> >> >>
> >> >> -     ctx->num_arrays = 0;
> >> >> -
> >> >>       /* XXX: Is there a better way to initialize all this ? */
> >> >>
> >> >>       lp_set_default_actions(bld_base);
> >> >> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct
> radeon_llvm_context * ctx)
> >> >>  {
> >> >>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
> >> >>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
> >> >> +     FREE(ctx->arrays);
> >> >> +     ctx->arrays = NULL;
> >> > Not particularly familiar with the radeon driver(s) so this might
> come a
> >> > bit silly - shouldn't one zero max_num_arrays at this stage ?
> >>
> >> Yes, I've sent a new patch for that.
> >> >
> >> > Similar counters (temps_count, output_reg_count) seems to be left
> >> > unchanged, while others (branch_depth_max, loop_depth_max) are reset.
> >>
> >> I've sent a patch for temps_count. I'm not touching output_reg_count,
> >> because it's only used by the dying r600g LLVM backend.
> >
> > just out of curiosity, why is it dying?
>
> The OpenGL shader support is dying because the LLVM backend is not
> used by default, it lacks some features, and SB should perform
> equally, if not better.
>
> The LLVM backend is still important for OpenCL though.
>

wouldn't a functioning llvm-r600 backend for OCL, and the fact that SI uses
llvm for graphics mean that GL-llvm-r600 is lower maintenance solution?
I'm not familiar with GL codepaths, is the GL(tgsi)-llvm conversion so
complex (and different from SI) that it's better to use SB?

jan


>
> Marek
>
I don't think so. The LLVM IR generated by radeonsi is slightly
different from the one generated by r600g. The most code that can be
shared is shared already.

Right now the main problem of the R600 LLVM backend is lack of
features. I think it's approximately at the OpenGL 3.0 or 3.1 level of
support. That's not even good enough for thinking about using it.

If somebody commits to adding OpenGL 4.x support into it, maybe then
it will have some chance of surviving. If there are volunteers, please
discuss it with Dave Airlie.

Marek

On Fri, Jun 5, 2015 at 8:22 PM, Jan Vesely <jan.vesely@rutgers.edu> wrote:
>
>
> On Tue, May 26, 2015 at 5:45 PM, Marek Olšák <maraeo@gmail.com> wrote:
>>
>> On Wed, May 27, 2015 at 12:39 AM, Jan Vesely <jan.vesely@rutgers.edu>
>> wrote:
>> > On Wed, 2015-05-27 at 00:36 +0200, Marek Olšák wrote:
>> >> On Tue, May 26, 2015 at 10:19 PM, Emil Velikov
>> >> <emil.l.velikov@gmail.com> wrote:
>> >> > On 26/05/15 14:04, Marek Olšák wrote:
>> >> >> From: Marek Olšák <marek.olsak@amd.com>
>> >> >>
>> >> >> ---
>> >> >>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
>> >> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15
>> >> >> +++++++++++----
>> >> >>  2 files changed, 13 insertions(+), 7 deletions(-)
>> >> >>
>> >> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h
>> >> >> b/src/gallium/drivers/radeon/radeon_llvm.h
>> >> >> index 8612ef8..ec4c343 100644
>> >> >> --- a/src/gallium/drivers/radeon/radeon_llvm.h
>> >> >> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
>> >> >> @@ -33,7 +33,6 @@
>> >> >>
>> >> >>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
>> >> >>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
>> >> >> -#define RADEON_LLVM_MAX_ARRAYS 16
>> >> >>
>> >> >>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
>> >> >>
>> >> >> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
>> >> >>       unsigned loop_depth;
>> >> >>       unsigned loop_depth_max;
>> >> >>
>> >> >> -     struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
>> >> >> -     unsigned num_arrays;
>> >> >> +     struct tgsi_declaration_range *arrays;
>> >> >> +     unsigned max_num_arrays;
>> >> >>
>> >> >>       LLVMValueRef main_fn;
>> >> >>
>> >> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> >> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> >> index 8638537..af5eb88 100644
>> >> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> >> >> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context
>> >> >> *bld_base,
>> >> >>               unsigned File, const struct tgsi_ind_register *reg)
>> >> >>  {
>> >> >>       struct radeon_llvm_context * ctx =
>> >> >> radeon_llvm_context(bld_base);
>> >> >> +
>> >> >>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
>> >> >> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
>> >> >> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
>> >> >>               struct tgsi_declaration_range range;
>> >> >>               range.First = 0;
>> >> >>               range.Last = bld_base->info->file_max[File];
>> >> >> @@ -252,8 +253,14 @@ static void emit_declaration(
>> >> >>       }
>> >> >>
>> >> >>       case TGSI_FILE_TEMPORARY:
>> >> >> -             if (decl->Declaration.Array && decl->Array.ArrayID <=
>> >> >> RADEON_LLVM_MAX_ARRAYS)
>> >> >> +             if (decl->Declaration.Array) {
>> >> >> +                     if (decl->Array.ArrayID-1 >=
>> >> >> ctx->max_num_arrays) {
>> >> >> +                             ctx->max_num_arrays += 32;
>> >> >> +                             ctx->arrays = realloc(ctx->arrays,
>> >> > You should use the REALLOC wrapper, considering the FREE below.
>> >> >
>> >> >> +                                             sizeof(ctx->arrays[0])
>> >> >> * ctx->max_num_arrays);
>> >> >> +                     }
>> >> >>                       ctx->arrays[decl->Array.ArrayID - 1] =
>> >> >> decl->Range;
>> >> >> +             }
>> >> >>               if (uses_temp_indirect_addressing(bld_base)) {
>> >> >>                       lp_emit_declaration_soa(bld_base, decl);
>> >> >>                       break;
>> >> >> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct
>> >> >> radeon_llvm_context * ctx)
>> >> >>       /* Allocate outputs */
>> >> >>       ctx->soa.outputs = ctx->outputs;
>> >> >>
>> >> >> -     ctx->num_arrays = 0;
>> >> >> -
>> >> >>       /* XXX: Is there a better way to initialize all this ? */
>> >> >>
>> >> >>       lp_set_default_actions(bld_base);
>> >> >> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct
>> >> >> radeon_llvm_context * ctx)
>> >> >>  {
>> >> >>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
>> >> >>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
>> >> >> +     FREE(ctx->arrays);
>> >> >> +     ctx->arrays = NULL;
>> >> > Not particularly familiar with the radeon driver(s) so this might
>> >> > come a
>> >> > bit silly - shouldn't one zero max_num_arrays at this stage ?
>> >>
>> >> Yes, I've sent a new patch for that.
>> >> >
>> >> > Similar counters (temps_count, output_reg_count) seems to be left
>> >> > unchanged, while others (branch_depth_max, loop_depth_max) are reset.
>> >>
>> >> I've sent a patch for temps_count. I'm not touching output_reg_count,
>> >> because it's only used by the dying r600g LLVM backend.
>> >
>> > just out of curiosity, why is it dying?
>>
>> The OpenGL shader support is dying because the LLVM backend is not
>> used by default, it lacks some features, and SB should perform
>> equally, if not better.
>>
>> The LLVM backend is still important for OpenCL though.
>
>
> wouldn't a functioning llvm-r600 backend for OCL, and the fact that SI uses
> llvm for graphics mean that GL-llvm-r600 is lower maintenance solution?
> I'm not familiar with GL codepaths, is the GL(tgsi)-llvm conversion so
> complex (and different from SI) that it's better to use SB?
>
> jan
>
>>
>>
>> Marek
>
>
On Fri, Jun 5, 2015 at 1:49 PM, Marek Olšák <maraeo@gmail.com> wrote:

> I don't think so. The LLVM IR generated by radeonsi is slightly
> different from the one generated by r600g. The most code that can be
> shared is shared already.
>
> Right now the main problem of the R600 LLVM backend is lack of
> features. I think it's approximately at the OpenGL 3.0 or 3.1 level of
> support. That's not even good enough for thinking about using it.
>
> If somebody commits to adding OpenGL 4.x support into it, maybe then
> it will have some chance of surviving. If there are volunteers, please
> discuss it with Dave Airlie.
>

So the main work would be to add GL specific opcodes/intrinsics to the
backend?
I'd volunteer, but I think lack of time and GL knowledge would prevent me
from making any kind of commitment.

Jan


>
> Marek
>
> On Fri, Jun 5, 2015 at 8:22 PM, Jan Vesely <jan.vesely@rutgers.edu> wrote:
> >
> >
> > On Tue, May 26, 2015 at 5:45 PM, Marek Olšák <maraeo@gmail.com> wrote:
> >>
> >> On Wed, May 27, 2015 at 12:39 AM, Jan Vesely <jan.vesely@rutgers.edu>
> >> wrote:
> >> > On Wed, 2015-05-27 at 00:36 +0200, Marek Olšák wrote:
> >> >> On Tue, May 26, 2015 at 10:19 PM, Emil Velikov
> >> >> <emil.l.velikov@gmail.com> wrote:
> >> >> > On 26/05/15 14:04, Marek Olšák wrote:
> >> >> >> From: Marek Olšák <marek.olsak@amd.com>
> >> >> >>
> >> >> >> ---
> >> >> >>  src/gallium/drivers/radeon/radeon_llvm.h            |  5 ++---
> >> >> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15
> >> >> >> +++++++++++----
> >> >> >>  2 files changed, 13 insertions(+), 7 deletions(-)
> >> >> >>
> >> >> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> >> b/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> >> index 8612ef8..ec4c343 100644
> >> >> >> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> >> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> >> >> >> @@ -33,7 +33,6 @@
> >> >> >>
> >> >> >>  #define RADEON_LLVM_MAX_INPUTS 32 * 4
> >> >> >>  #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
> >> >> >> -#define RADEON_LLVM_MAX_ARRAYS 16
> >> >> >>
> >> >> >>  #define RADEON_LLVM_INITIAL_CF_DEPTH 4
> >> >> >>
> >> >> >> @@ -130,8 +129,8 @@ struct radeon_llvm_context {
> >> >> >>       unsigned loop_depth;
> >> >> >>       unsigned loop_depth_max;
> >> >> >>
> >> >> >> -     struct tgsi_declaration_range
> arrays[RADEON_LLVM_MAX_ARRAYS];
> >> >> >> -     unsigned num_arrays;
> >> >> >> +     struct tgsi_declaration_range *arrays;
> >> >> >> +     unsigned max_num_arrays;
> >> >> >>
> >> >> >>       LLVMValueRef main_fn;
> >> >> >>
> >> >> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> >> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> >> index 8638537..af5eb88 100644
> >> >> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> >> >> @@ -85,8 +85,9 @@ get_array_range(struct lp_build_tgsi_context
> >> >> >> *bld_base,
> >> >> >>               unsigned File, const struct tgsi_ind_register *reg)
> >> >> >>  {
> >> >> >>       struct radeon_llvm_context * ctx =
> >> >> >> radeon_llvm_context(bld_base);
> >> >> >> +
> >> >> >>       if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
> >> >> >> -            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
> >> >> >> +         reg->ArrayID-1 >= ctx->max_num_arrays) {
> >> >> >>               struct tgsi_declaration_range range;
> >> >> >>               range.First = 0;
> >> >> >>               range.Last = bld_base->info->file_max[File];
> >> >> >> @@ -252,8 +253,14 @@ static void emit_declaration(
> >> >> >>       }
> >> >> >>
> >> >> >>       case TGSI_FILE_TEMPORARY:
> >> >> >> -             if (decl->Declaration.Array && decl->Array.ArrayID
> <=
> >> >> >> RADEON_LLVM_MAX_ARRAYS)
> >> >> >> +             if (decl->Declaration.Array) {
> >> >> >> +                     if (decl->Array.ArrayID-1 >=
> >> >> >> ctx->max_num_arrays) {
> >> >> >> +                             ctx->max_num_arrays += 32;
> >> >> >> +                             ctx->arrays = realloc(ctx->arrays,
> >> >> > You should use the REALLOC wrapper, considering the FREE below.
> >> >> >
> >> >> >> +
>  sizeof(ctx->arrays[0])
> >> >> >> * ctx->max_num_arrays);
> >> >> >> +                     }
> >> >> >>                       ctx->arrays[decl->Array.ArrayID - 1] =
> >> >> >> decl->Range;
> >> >> >> +             }
> >> >> >>               if (uses_temp_indirect_addressing(bld_base)) {
> >> >> >>                       lp_emit_declaration_soa(bld_base, decl);
> >> >> >>                       break;
> >> >> >> @@ -1432,8 +1439,6 @@ void radeon_llvm_context_init(struct
> >> >> >> radeon_llvm_context * ctx)
> >> >> >>       /* Allocate outputs */
> >> >> >>       ctx->soa.outputs = ctx->outputs;
> >> >> >>
> >> >> >> -     ctx->num_arrays = 0;
> >> >> >> -
> >> >> >>       /* XXX: Is there a better way to initialize all this ? */
> >> >> >>
> >> >> >>       lp_set_default_actions(bld_base);
> >> >> >> @@ -1622,6 +1627,8 @@ void radeon_llvm_dispose(struct
> >> >> >> radeon_llvm_context * ctx)
> >> >> >>  {
> >> >> >>       LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
> >> >> >>       LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
> >> >> >> +     FREE(ctx->arrays);
> >> >> >> +     ctx->arrays = NULL;
> >> >> > Not particularly familiar with the radeon driver(s) so this might
> >> >> > come a
> >> >> > bit silly - shouldn't one zero max_num_arrays at this stage ?
> >> >>
> >> >> Yes, I've sent a new patch for that.
> >> >> >
> >> >> > Similar counters (temps_count, output_reg_count) seems to be left
> >> >> > unchanged, while others (branch_depth_max, loop_depth_max) are
> reset.
> >> >>
> >> >> I've sent a patch for temps_count. I'm not touching output_reg_count,
> >> >> because it's only used by the dying r600g LLVM backend.
> >> >
> >> > just out of curiosity, why is it dying?
> >>
> >> The OpenGL shader support is dying because the LLVM backend is not
> >> used by default, it lacks some features, and SB should perform
> >> equally, if not better.
> >>
> >> The LLVM backend is still important for OpenCL though.
> >
> >
> > wouldn't a functioning llvm-r600 backend for OCL, and the fact that SI
> uses
> > llvm for graphics mean that GL-llvm-r600 is lower maintenance solution?
> > I'm not familiar with GL codepaths, is the GL(tgsi)-llvm conversion so
> > complex (and different from SI) that it's better to use SB?
> >
> > jan
> >
> >>
> >>
> >> Marek
> >
> >
>
On Mon, Jun 8, 2015 at 5:24 PM, Jan Vesely <jan.vesely@rutgers.edu> wrote:
>
>
> On Fri, Jun 5, 2015 at 1:49 PM, Marek Olšák <maraeo@gmail.com> wrote:
>>
>> I don't think so. The LLVM IR generated by radeonsi is slightly
>> different from the one generated by r600g. The most code that can be
>> shared is shared already.
>>
>> Right now the main problem of the R600 LLVM backend is lack of
>> features. I think it's approximately at the OpenGL 3.0 or 3.1 level of
>> support. That's not even good enough for thinking about using it.
>>
>> If somebody commits to adding OpenGL 4.x support into it, maybe then
>> it will have some chance of surviving. If there are volunteers, please
>> discuss it with Dave Airlie.
>
>
> So the main work would be to add GL specific opcodes/intrinsics to the
> backend?

Yes, adding new opcodes and translating TGSI into them. The
translation part might not be straightforward, which means you'd have
to understand very well what you're doing.

Marek