[1/2] radeonsi: remove old_va parameter from si_rebind_buffer by remembering offsets

Submitted by Marek Olšák on May 10, 2019, 5:19 a.m.

Details

Message ID 20190510051956.30409-1-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák May 10, 2019, 5:19 a.m.
From: Marek Olšák <marek.olsak@amd.com>

This is a prerequisite for the next commit.

Cc: 19.1 <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_buffer.c      |  7 +--
 src/gallium/drivers/radeonsi/si_descriptors.c | 54 ++++++++-----------
 src/gallium/drivers/radeonsi/si_state.h       |  4 +-
 3 files changed, 25 insertions(+), 40 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 4936eb5a5b1..76705937b65 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -280,56 +280,53 @@  si_invalidate_buffer(struct si_context *sctx,
 
 	/* In AMD_pinned_memory, the user pointer association only gets
 	 * broken when the buffer is explicitly re-allocated.
 	 */
 	if (buf->b.is_user_ptr)
 		return false;
 
 	/* Check if mapping this buffer would cause waiting for the GPU. */
 	if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
 	    !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
-		uint64_t old_va = buf->gpu_address;
-
 		/* Reallocate the buffer in the same pipe_resource. */
 		si_alloc_resource(sctx->screen, buf);
-		si_rebind_buffer(sctx, &buf->b.b, old_va);
+		si_rebind_buffer(sctx, &buf->b.b);
 	} else {
 		util_range_set_empty(&buf->valid_buffer_range);
 	}
 
 	return true;
 }
 
 /* Replace the storage of dst with src. */
 void si_replace_buffer_storage(struct pipe_context *ctx,
 				 struct pipe_resource *dst,
 				 struct pipe_resource *src)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_resource *sdst = si_resource(dst);
 	struct si_resource *ssrc = si_resource(src);
-	uint64_t old_gpu_address = sdst->gpu_address;
 
 	pb_reference(&sdst->buf, ssrc->buf);
 	sdst->gpu_address = ssrc->gpu_address;
 	sdst->b.b.bind = ssrc->b.b.bind;
 	sdst->b.max_forced_staging_uploads = ssrc->b.max_forced_staging_uploads;
 	sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads;
 	sdst->flags = ssrc->flags;
 
 	assert(sdst->vram_usage == ssrc->vram_usage);
 	assert(sdst->gart_usage == ssrc->gart_usage);
 	assert(sdst->bo_size == ssrc->bo_size);
 	assert(sdst->bo_alignment == ssrc->bo_alignment);
 	assert(sdst->domains == ssrc->domains);
 
-	si_rebind_buffer(sctx, dst, old_gpu_address);
+	si_rebind_buffer(sctx, dst);
 }
 
 static void si_invalidate_resource(struct pipe_context *ctx,
 				   struct pipe_resource *resource)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_resource *buf = si_resource(resource);
 
 	/* We currently only do anyting here for buffers */
 	if (resource->target == PIPE_BUFFER)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index f795c33cf26..744fc9a15d7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -992,34 +992,36 @@  static void si_bind_sampler_states(struct pipe_context *ctx,
 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 				     struct si_descriptors *descs,
 				     unsigned num_buffers,
 				     short shader_userdata_rel_index,
 				     enum radeon_bo_priority priority,
 				     enum radeon_bo_priority priority_constbuf)
 {
 	buffers->priority = priority;
 	buffers->priority_constbuf = priority_constbuf;
 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
+	buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));
 
 	si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
 }
 
 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
 					struct si_descriptors *descs)
 {
 	int i;
 
 	for (i = 0; i < descs->num_elements; i++) {
 		pipe_resource_reference(&buffers->buffers[i], NULL);
 	}
 
 	FREE(buffers->buffers);
+	FREE(buffers->offsets);
 }
 
 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 					     struct si_buffer_resources *buffers)
 {
 	unsigned mask = buffers->enabled_mask;
 
 	/* Add buffers to the CS. */
 	while (mask) {
 		int i = u_bit_scan(&mask);
@@ -1212,53 +1214,54 @@  static void si_set_constant_buffer(struct si_context *sctx,
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
 	 * with a NULL buffer). We need to use a dummy buffer instead. */
 	if (sctx->chip_class == CIK &&
 	    (!input || (!input->buffer && !input->user_buffer)))
 		input = &sctx->null_const_buf;
 
 	if (input && (input->buffer || input->user_buffer)) {
 		struct pipe_resource *buffer = NULL;
 		uint64_t va;
+		unsigned buffer_offset;
 
 		/* Upload the user buffer if needed. */
 		if (input->user_buffer) {
-			unsigned buffer_offset;
-
 			si_upload_const_buffer(sctx,
 					       (struct si_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
 			if (!buffer) {
 				/* Just unbind on failure. */
 				si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
 				return;
 			}
-			va = si_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
-			va = si_resource(buffer)->gpu_address + input->buffer_offset;
+			buffer_offset = input->buffer_offset;
 		}
 
+		va = si_resource(buffer)->gpu_address + buffer_offset;
+
 		/* Set the descriptor. */
 		uint32_t *desc = descs->list + slot*4;
 		desc[0] = va;
 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 			  S_008F04_STRIDE(0);
 		desc[2] = input->buffer_size;
 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 		buffers->buffers[slot] = buffer;
+		buffers->offsets[slot] = buffer_offset;
 		radeon_add_to_gfx_buffer_list_check_mem(sctx,
 							si_resource(buffer),
 							RADEON_USAGE_READ,
 							buffers->priority_constbuf, true);
 		buffers->enabled_mask |= 1u << slot;
 	} else {
 		/* Clear the descriptor. */
 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->enabled_mask &= ~(1u << slot);
 	}
@@ -1329,20 +1332,21 @@  static void si_set_shader_buffer(struct si_context *sctx,
 		  S_008F04_STRIDE(0);
 	desc[2] = sbuffer->buffer_size;
 	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 		  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 		  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
 		  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 		  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 		  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 	pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+	buffers->offsets[slot] = sbuffer->buffer_offset;
 	radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
 						writable ? RADEON_USAGE_READWRITE :
 							   RADEON_USAGE_READ,
 						priority, true);
 	if (writable)
 		buffers->writable_mask |= 1u << slot;
 	else
 		buffers->writable_mask &= ~(1u << slot);
 
 	buffers->enabled_mask |= 1u << slot;
@@ -1498,34 +1502,20 @@  void si_set_ring_buffer(struct si_context *sctx, uint slot,
 		buffers->enabled_mask |= 1u << slot;
 	} else {
 		/* Clear the descriptor. */
 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->enabled_mask &= ~(1u << slot);
 	}
 
 	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 }
 
-static void si_desc_reset_buffer_offset(uint32_t *desc, uint64_t old_buf_va,
-					struct pipe_resource *new_buf)
-{
-	/* Retrieve the buffer offset from the descriptor. */
-	uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
-
-	assert(old_buf_va <= old_desc_va);
-	uint64_t offset_within_buffer = old_desc_va - old_buf_va;
-
-	/* Update the descriptor. */
-	si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer,
-				desc);
-}
-
 /* INTERNAL CONST BUFFERS */
 
 static void si_set_polygon_stipple(struct pipe_context *ctx,
 				   const struct pipe_poly_stipple *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct pipe_constant_buffer cb = {};
 	unsigned stipple[32];
 	int i;
 
@@ -1596,48 +1586,46 @@  void si_update_needs_color_decompress_masks(struct si_context *sctx)
 }
 
 /* BUFFER DISCARD/INVALIDATION */
 
 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
 static void si_reset_buffer_resources(struct si_context *sctx,
 				      struct si_buffer_resources *buffers,
 				      unsigned descriptors_idx,
 				      unsigned slot_mask,
 				      struct pipe_resource *buf,
-				      uint64_t old_va,
 				      enum radeon_bo_priority priority)
 {
 	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
 	unsigned mask = buffers->enabled_mask & slot_mask;
 
 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
 		if (buffers->buffers[i] == buf) {
-			si_desc_reset_buffer_offset(descs->list + i*4,
-						    old_va, buf);
+			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << descriptors_idx;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
 								si_resource(buf),
 								buffers->writable_mask & (1u << i) ?
 									RADEON_USAGE_READWRITE :
 									RADEON_USAGE_READ,
 								priority, true);
 		}
 	}
 }
 
 /* Update all resource bindings where the buffer is bound, including
  * all resource descriptors. This is invalidate_buffer without
  * the invalidation. */
-void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
-		      uint64_t old_va)
+void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 {
 	struct si_resource *buffer = si_resource(buf);
 	unsigned i, shader;
 	unsigned num_elems = sctx->vertex_elements ?
 				       sctx->vertex_elements->count : 0;
 
 	/* We changed the buffer, now we need to bind it where the old one
 	 * was bound. This consists of 2 things:
 	 *   1) Updating the resource descriptor and dirtying it.
 	 *   2) Adding a relocation to the CS, so that it's usable.
@@ -1663,22 +1651,22 @@  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	/* Streamout buffers. (other internal buffers can't be invalidated) */
 	if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
 		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
 			struct si_buffer_resources *buffers = &sctx->rw_buffers;
 			struct si_descriptors *descs =
 				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
 
 			if (buffers->buffers[i] != buf)
 				continue;
 
-			si_desc_reset_buffer_offset(descs->list + i*4,
-						    old_va, buf);
+			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
 								buffer, RADEON_USAGE_WRITE,
 								RADEON_PRIO_SHADER_RW_BUFFER,
 								true);
 
 			/* Update the streamout state. */
 			if (sctx->streamout.begin_emitted)
 				si_emit_streamout_end(sctx);
@@ -1687,49 +1675,49 @@  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 			si_streamout_buffers_dirty(sctx);
 		}
 	}
 
 	/* Constant and shader buffers. */
 	if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
 						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
-						  buf, old_va,
+						  buf,
 						  sctx->const_and_shader_buffers[shader].priority_constbuf);
 	}
 
 	if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
 						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
-						  buf, old_va,
+						  buf,
 						  sctx->const_and_shader_buffers[shader].priority);
 	}
 
 	if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_samplers *samplers = &sctx->samplers[shader];
 			struct si_descriptors *descs =
 				si_sampler_and_image_descriptors(sctx, shader);
 			unsigned mask = samplers->enabled_mask;
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
 				if (samplers->views[i]->texture == buf) {
 					unsigned desc_slot = si_get_sampler_slot(i);
 
-					si_desc_reset_buffer_offset(descs->list +
-								    desc_slot * 16 + 4,
-								    old_va, buf);
+					si_set_buf_desc_address(si_resource(buf),
+								samplers->views[i]->u.buf.offset,
+								descs->list + desc_slot * 16 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_gfx_buffer_list_check_mem(sctx,
 									    buffer, RADEON_USAGE_READ,
 									    RADEON_PRIO_SAMPLER_BUFFER,
 									    true);
 				}
 			}
 		}
@@ -1745,23 +1733,23 @@  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
 
 				if (images->views[i].resource == buf) {
 					unsigned desc_slot = si_get_image_slot(i);
 
 					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
 						si_mark_image_range_valid(&images->views[i]);
 
-					si_desc_reset_buffer_offset(
-						descs->list + desc_slot * 8 + 4,
-						old_va, buf);
+					si_set_buf_desc_address(si_resource(buf),
+								images->views[i].u.buf.offset,
+								descs->list + desc_slot * 8 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_gfx_buffer_list_check_mem(
 						sctx, buffer,
 						RADEON_USAGE_READWRITE,
 						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
 			}
 		}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6df24f9648a..6d74d774b6d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -402,20 +402,21 @@  struct si_descriptors {
 	short shader_userdata_offset;
 	/* The size of one descriptor. */
 	ubyte element_dw_size;
 	/* If there is only one slot enabled, bind it directly instead of
 	 * uploading descriptors. -1 if disabled. */
 	signed char slot_index_to_bind_directly;
 };
 
 struct si_buffer_resources {
 	struct pipe_resource		**buffers; /* this has num_buffers elements */
+	unsigned			*offsets; /* this has num_buffers elements */
 
 	enum radeon_bo_priority		priority:6;
 	enum radeon_bo_priority		priority_constbuf:6;
 
 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
 	unsigned			enabled_mask;
 	unsigned			writable_mask;
 };
 
 #define si_pm4_state_changed(sctx, member) \
@@ -480,22 +481,21 @@  void si_set_rw_shader_buffer(struct si_context *sctx, uint slot,
 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
 			       uint64_t new_active_mask);
 void si_set_active_descriptors_for_shader(struct si_context *sctx,
 					  struct si_shader_selector *sel);
 bool si_bindless_descriptor_can_reclaim_slab(void *priv,
 					     struct pb_slab_entry *entry);
 struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
 						  unsigned entry_size,
 						  unsigned group_index);
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
-void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
-		      uint64_t old_va);
+void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf);
 /* si_state.c */
 void si_init_state_compute_functions(struct si_context *sctx);
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);
 void
 si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state);
 void