[18/26] radeonsi: add threadgroups_per_cu param into si_get_compute_resource_limits

Submitted by Marek Olšák on Feb. 13, 2019, 5:16 a.m.

Details

Message ID 20190213051621.6235-19-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: Primitive culling with async compute" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 13, 2019, 5:16 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/drivers/radeonsi/si_compute.c | 9 ++++++---
 src/gallium/drivers/radeonsi/si_pipe.h    | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 52a62dcb7fa..dc6f647d9a8 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -750,7 +750,8 @@  static void si_setup_tgsi_user_data(struct si_context *sctx,
 
 unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
 					unsigned waves_per_threadgroup,
-					unsigned max_waves_per_sh)
+					unsigned max_waves_per_sh,
+					unsigned threadgroups_per_cu)
 {
 	unsigned compute_resource_limits =
 		S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
@@ -766,7 +767,9 @@  unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
 		if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
 			compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
 
-		compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh);
+		assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+		compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
+					   S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
 	} else {
 		/* SI */
 		if (max_waves_per_sh) {
@@ -788,7 +791,7 @@  static void si_emit_dispatch_packets(struct si_context *sctx,
 
 	radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
 			  si_get_compute_resource_limits(sscreen, waves_per_threadgroup,
-							 sctx->cs_max_waves_per_sh));
+							 sctx->cs_max_waves_per_sh, 1));
 
 	unsigned dispatch_initiator =
 		S_00B800_COMPUTE_SHADER_EN(1) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e4e731e913b..330cdfa0c12 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1294,7 +1294,8 @@  unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
 void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs);
 unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
 					unsigned waves_per_threadgroup,
-					unsigned max_waves_per_sh);
+					unsigned max_waves_per_sh,
+					unsigned threadgroups_per_cu);
 void si_init_compute_functions(struct si_context *sctx);
 
 /* si_perfcounters.c */