[4/7] radeonsi: add s_sethalt to shaders for debugging

Submitted by Marek Olšák on June 13, 2019, 12:40 a.m.

Details

Message ID 20190613004041.32721-4-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák June 13, 2019, 12:40 a.m.
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

---
 src/amd/common/ac_rtld.c                        | 9 +++++++++
 src/amd/common/ac_rtld.h                        | 9 +++++++++
 src/gallium/drivers/radeonsi/si_debug_options.h | 1 +
 src/gallium/drivers/radeonsi/si_shader.c        | 3 +++
 4 files changed, 22 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
index 92020c5f0dd..c750dbfa9cb 100644
--- a/src/amd/common/ac_rtld.c
+++ b/src/amd/common/ac_rtld.c
@@ -236,20 +236,21 @@  static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
 bool ac_rtld_open(struct ac_rtld_binary *binary,
 		  struct ac_rtld_open_info i)
 {
 	/* One of the libelf implementations
 	 * (http://www.mr511.de/software/english.htm) requires calling
 	 * elf_version() before elf_memory().
 	 */
 	elf_version(EV_CURRENT);
 
 	memset(binary, 0, sizeof(*binary));
+	memcpy(&binary->options, &i.options, sizeof(binary->options));
 	binary->num_parts = i.num_parts;
 	binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
 	if (!binary->parts)
 		return false;
 
 	uint64_t pasted_text_size = 0;
 	uint64_t rx_align = 1;
 	uint64_t rx_size = 0;
 
 #define report_if(cond) \
@@ -283,20 +284,23 @@  bool ac_rtld_open(struct ac_rtld_binary *binary,
 	uint64_t shared_lds_size = 0;
 	if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
 		goto fail;
 	report_if(shared_lds_size > max_lds_size);
 	binary->lds_size = shared_lds_size;
 
 	/* First pass over all parts: open ELFs, pre-determine the placement of
 	 * sections in the memory image, and collect and layout private LDS symbols. */
 	uint32_t lds_end_align = 0;
 
+	if (binary->options.halt_at_entry)
+		pasted_text_size += 4;
+
 	for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
 		struct ac_rtld_part *part = &binary->parts[part_idx];
 		unsigned part_lds_symbols_begin =
 			util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
 
 		part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
 		report_elf_if(!part->elf);
 
 		const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
 		report_elf_if(!ehdr);
@@ -685,20 +689,25 @@  bool ac_rtld_upload(struct ac_rtld_upload_info *u)
 		} \
 	} while (false)
 #define report_elf_if(cond) \
 	do { \
 		if ((cond)) { \
 			report_errorf(#cond); \
 			return false; \
 		} \
 	} while (false)
 
+	if (u->binary->options.halt_at_entry) {
+		/* s_sethalt 1 */
+		*(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
+	}
+
 	/* First pass: upload raw section data and lay out private LDS symbols. */
 	for (unsigned i = 0; i < u->binary->num_parts; ++i) {
 		struct ac_rtld_part *part = &u->binary->parts[i];
 
 		Elf_Scn *section = NULL;
 		while ((section = elf_nextscn(part->elf, section))) {
 			Elf64_Shdr *shdr = elf64_getshdr(section);
 			struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
 
 			if (!s->is_rx)
diff --git a/src/amd/common/ac_rtld.h b/src/amd/common/ac_rtld.h
index 01c29b50817..b13270b181d 100644
--- a/src/amd/common/ac_rtld.h
+++ b/src/amd/common/ac_rtld.h
@@ -35,22 +35,30 @@  struct ac_shader_config;
 struct radeon_info;
 
 struct ac_rtld_symbol {
 	const char *name;
 	uint32_t size;
 	uint32_t align;
 	uint64_t offset; /* filled in by ac_rtld_open */
 	unsigned part_idx; /* shader part in which this symbol appears */
 };
 
+struct ac_rtld_options {
+	/* Loader will insert an s_sethalt 1 instruction as the
+	 * first instruction. */
+	bool halt_at_entry:1;
+};
+
 /* Lightweight wrapper around underlying ELF objects. */
 struct ac_rtld_binary {
+	struct ac_rtld_options options;
+
 	/* Required buffer sizes, currently read/executable only. */
 	uint64_t rx_size;
 
 	uint64_t rx_end_markers;
 
 	unsigned num_parts;
 	struct ac_rtld_part *parts;
 
 	struct util_dynarray lds_symbols;
 	uint32_t lds_size;
@@ -68,20 +76,21 @@  struct ac_rtld_binary {
 typedef bool (*ac_rtld_get_external_symbol_cb)(
 	void *cb_data, const char *symbol, uint64_t *value);
 
 /**
  * Lifetimes of \ref info, in-memory ELF objects, and the names of
  * \ref shared_lds_symbols must extend until \ref ac_rtld_close is called on
  * the opened binary.
  */
 struct ac_rtld_open_info {
 	const struct radeon_info *info;
+	struct ac_rtld_options options;
 
 	unsigned num_parts;
 	const char * const *elf_ptrs; /* in-memory ELF objects of each part */
 	const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
 
 	/* Shared LDS symbols are layouted such that they are accessible from
 	 * all shader parts. Non-shared (private) LDS symbols of one part may
 	 * overlap private LDS symbols of another shader part.
 	 */
 	unsigned num_shared_lds_symbols;
diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h
index aa8d64e1b88..d6cb3157632 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -1,10 +1,11 @@ 
 OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear")
 OPT_BOOL(enable_nir, false, "Enable NIR")
 OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context")
 OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)")
 OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps")
 OPT_BOOL(debug_disassembly, false, "Report shader disassembly as part of driver debug messages (for shader db)")
+OPT_BOOL(halt_shaders, false, "Halt shaders at the start (will hang)")
 OPT_BOOL(vs_fetch_always_opencode, false, "Always open code vertex fetches (less efficient, purely for testing)")
 OPT_BOOL(prim_restart_tri_strips_only, false, "Only enable primitive restart for triangle strips")
 
 #undef OPT_BOOL
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 92c68f21459..3c3d74ce7af 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5126,20 +5126,23 @@  static bool si_shader_binary_open(struct si_screen *screen,
 		 * shader->config.lds_size is set correctly below.
 		 */
 		struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
 		sym->name = "esgs_ring";
 		sym->size = shader->gs_info.esgs_ring_size;
 		sym->align = 64 * 1024;
 	}
 
 	bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){
 			.info = &screen->info,
+			.options = {
+				.halt_at_entry = screen->options.halt_shaders,
+			},
 			.num_parts = num_parts,
 			.elf_ptrs = part_elfs,
 			.elf_sizes = part_sizes,
 			.num_shared_lds_symbols = num_lds_symbols,
 			.shared_lds_symbols = lds_symbols });
 
 	if (rtld->lds_size > 0) {
 		unsigned alloc_granularity = screen->info.chip_class >= GFX7 ? 512 : 256;
 		shader->config.lds_size =
 			align(rtld->lds_size, alloc_granularity) / alloc_granularity;