[1/2] radeonsi: always use compute rings for clover on CI and newer (v2)

Submitted by Marek Olšák on Feb. 12, 2019, 6:12 p.m.

Details

Message ID 20190212181251.4828-1-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 12, 2019, 6:12 p.m.
From: Marek Olšák <marek.olsak@amd.com>

initialize all non-compute context functions to NULL.

v2: fix SI
---
 src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
 src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
 src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
 src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
 src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
 src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
 src/gallium/drivers/radeonsi/si_state.c       |  3 +-
 src/gallium/drivers/radeonsi/si_state.h       |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
 src/gallium/drivers/radeonsi/si_texture.c     |  3 +
 11 files changed, 130 insertions(+), 75 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index bb8d1cbd12d..f39cb5d143f 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -1345,25 +1345,31 @@  static void si_flush_resource(struct pipe_context *ctx,
 
 		if (separate_dcc_dirty) {
 			tex->separate_dcc_dirty = false;
 			vi_separate_dcc_process_and_reset_stats(ctx, tex);
 		}
 	}
 }
 
 void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
 {
-	if (!tex->dcc_offset)
+	/* If graphics is disabled, we can't decompress DCC, but it shouldn't
+	 * be compressed either. The caller should simply discard it.
+	 */
+	if (!tex->dcc_offset || !sctx->has_graphics)
 		return;
 
 	si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
 				 0, util_max_layer(&tex->buffer.b.b, 0),
 				 true);
 }
 
 void si_init_blit_functions(struct si_context *sctx)
 {
 	sctx->b.resource_copy_region = si_resource_copy_region;
-	sctx->b.blit = si_blit;
-	sctx->b.flush_resource = si_flush_resource;
-	sctx->b.generate_mipmap = si_generate_mipmap;
+
+	if (sctx->has_graphics) {
+		sctx->b.blit = si_blit;
+		sctx->b.flush_resource = si_flush_resource;
+		sctx->b.generate_mipmap = si_generate_mipmap;
+	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 9a00bb73b94..e1805f2a1c9 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -764,15 +764,18 @@  static void si_clear_texture(struct pipe_context *pipe,
 			util_clear_render_target(pipe, sf, &color,
 						 box->x, box->y,
 						 box->width, box->height);
 		}
 	}
 	pipe_surface_reference(&sf, NULL);
 }
 
 void si_init_clear_functions(struct si_context *sctx)
 {
-	sctx->b.clear = si_clear;
 	sctx->b.clear_render_target = si_clear_render_target;
-	sctx->b.clear_depth_stencil = si_clear_depth_stencil;
 	sctx->b.clear_texture = si_clear_texture;
+
+	if (sctx->has_graphics) {
+		sctx->b.clear = si_clear;
+		sctx->b.clear_depth_stencil = si_clear_depth_stencil;
+	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 1a62b3e0844..87addd53976 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -880,26 +880,28 @@  static void si_launch_grid(
 		info->block[0] * info->block[1] * info->block[2] > 256;
 
 	if (cs_regalloc_hang)
 		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
 	    program->shader.compilation_failed)
 		return;
 
-	if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
-		si_update_fb_dirtiness_after_rendering(sctx);
-		sctx->last_num_draw_calls = sctx->num_draw_calls;
-	}
+	if (sctx->has_graphics) {
+		if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
+			si_update_fb_dirtiness_after_rendering(sctx);
+			sctx->last_num_draw_calls = sctx->num_draw_calls;
+		}
 
-	si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
+		si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
+	}
 
 	/* Add buffer sizes for memory checking in need_cs_space. */
 	si_context_add_resource_size(sctx, &program->shader.bo->b.b);
 	/* TODO: add the scratch buffer */
 
 	if (info->indirect) {
 		si_context_add_resource_size(sctx, info->indirect);
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
 		if (sctx->chip_class <= VI &&
@@ -917,21 +919,22 @@  static void si_launch_grid(
 	if (sctx->flags)
 		si_emit_cache_flush(sctx);
 
 	if (!si_switch_compute_shader(sctx, program, &program->shader,
 					code_object, info->pc))
 		return;
 
 	si_upload_compute_shader_descriptors(sctx);
 	si_emit_compute_shader_pointers(sctx);
 
-	if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
+	if (sctx->has_graphics &&
+	    si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
 		sctx->atoms.s.render_cond.emit(sctx);
 		si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
 	}
 
 	if ((program->input_size ||
             program->ir_type == PIPE_SHADER_IR_NATIVE) &&
            unlikely(!si_upload_compute_input(sctx, code_object, info))) {
 		return;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 21d4ca946d3..0f22c55723c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2640,22 +2640,24 @@  void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
 
 	sctx->num_resident_handles += num_resident_tex_handles +
 					num_resident_img_handles;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;
+	unsigned first_shader =
+		sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
 
-	for (i = 0; i < SI_NUM_SHADERS; i++) {
+	for (i = first_shader; i < SI_NUM_SHADERS; i++) {
 		bool is_2nd = sctx->chip_class >= GFX9 &&
 				     (i == PIPE_SHADER_TESS_CTRL ||
 				      i == PIPE_SHADER_GEOMETRY);
 		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
 		unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
 		int rel_dw_offset;
 		struct si_descriptors *desc;
 
 		if (is_2nd) {
 			if (i == PIPE_SHADER_TESS_CTRL) {
@@ -2714,30 +2716,34 @@  void si_init_all_descriptors(struct si_context *sctx)
 	si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
 				     SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
 				     1024);
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	/* Set pipe_context functions. */
 	sctx->b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.set_shader_images = si_set_shader_images;
 	sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
-	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.set_sampler_views = si_set_sampler_views;
 	sctx->b.create_texture_handle = si_create_texture_handle;
 	sctx->b.delete_texture_handle = si_delete_texture_handle;
 	sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
 	sctx->b.create_image_handle = si_create_image_handle;
 	sctx->b.delete_image_handle = si_delete_image_handle;
 	sctx->b.make_image_handle_resident = si_make_image_handle_resident;
 
+	if (!sctx->has_graphics)
+		return;
+
+	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
+
 	/* Shader user data. */
 	sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
 	if (sctx->chip_class >= GFX9) {
 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
 				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 3d64587fa2b..d0e7cf20b4c 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -103,27 +103,29 @@  void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	 * This code is only needed when the driver flushes the GFX IB
 	 * internally, and it never asks for a fence handle.
 	 */
 	if (radeon_emitted(ctx->dma_cs, 0)) {
 		assert(fence == NULL); /* internal flushes only */
 		si_flush_dma_cs(ctx, flags, NULL);
 	}
 
 	ctx->gfx_flush_in_progress = true;
 
-	if (!LIST_IS_EMPTY(&ctx->active_queries))
-		si_suspend_queries(ctx);
-
-	ctx->streamout.suspended = false;
-	if (ctx->streamout.begin_emitted) {
-		si_emit_streamout_end(ctx);
-		ctx->streamout.suspended = true;
+	if (ctx->has_graphics) {
+		if (!LIST_IS_EMPTY(&ctx->active_queries))
+			si_suspend_queries(ctx);
+
+		ctx->streamout.suspended = false;
+		if (ctx->streamout.begin_emitted) {
+			si_emit_streamout_end(ctx);
+			ctx->streamout.suspended = true;
+		}
 	}
 
 	/* Make sure CP DMA is idle at the end of IBs after L2 prefetches
 	 * because the kernel doesn't wait for it. */
 	if (ctx->chip_class >= CIK)
 		si_cp_dma_wait_for_idle(ctx);
 
 	/* Wait for draw calls to finish if needed. */
 	if (wait_flags) {
 		ctx->flags |= wait_flags;
@@ -209,20 +211,29 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	 * IB starts drawing.
 	 *
 	 * TODO: Do we also need to invalidate CB & DB caches?
 	 */
 	ctx->flags |= SI_CONTEXT_INV_ICACHE |
 		      SI_CONTEXT_INV_SMEM_L1 |
 		      SI_CONTEXT_INV_VMEM_L1 |
 		      SI_CONTEXT_INV_GLOBAL_L2 |
 		      SI_CONTEXT_START_PIPELINE_STATS;
 
+	ctx->cs_shader_state.initialized = false;
+	si_all_descriptors_begin_new_cs(ctx);
+	si_all_resident_buffers_begin_new_cs(ctx);
+
+	if (!ctx->has_graphics) {
+		ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
+		return;
+	}
+
 	/* set all valid group as dirty so they get reemited on
 	 * next draw command
 	 */
 	si_pm4_reset_emitted(ctx);
 
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
 	if (ctx->init_config_gs_rings)
 		si_pm4_emit(ctx, ctx->init_config_gs_rings);
 
@@ -273,22 +284,20 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
 	if (ctx->chip_class >= GFX9)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
 	/* CLEAR_STATE disables all window rectangles. */
 	if (!has_clear_state || ctx->num_window_rectangles > 0)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
-	si_all_descriptors_begin_new_cs(ctx);
-	si_all_resident_buffers_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
 	if (ctx->scratch_buffer) {
@@ -316,22 +325,20 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	ctx->last_multi_vgt_param = -1;
 	ctx->last_rast_prim = -1;
 	ctx->last_sc_line_stipple = ~0;
 	ctx->last_vs_state = ~0;
 	ctx->last_ls = NULL;
 	ctx->last_tcs = NULL;
 	ctx->last_tes_sh_base = -1;
 	ctx->last_num_tcs_input_cp = -1;
 	ctx->last_ls_hs_config = -1; /* impossible value */
 
-	ctx->cs_shader_state.initialized = false;
-
 	if (has_clear_state) {
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 20767c806d2..c2ec664d5a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -381,61 +381,56 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 {
 	struct si_context *sctx = CALLOC_STRUCT(si_context);
 	struct si_screen* sscreen = (struct si_screen *)screen;
 	struct radeon_winsys *ws = sscreen->ws;
 	int shader, i;
 	bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
 
 	if (!sctx)
 		return NULL;
 
+	sctx->has_graphics = sscreen->info.chip_class == SI ||
+			     !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
+
 	if (flags & PIPE_CONTEXT_DEBUG)
 		sscreen->record_llvm_ir = true; /* racy but not critical */
 
 	sctx->b.screen = screen; /* this must be set first */
 	sctx->b.priv = NULL;
 	sctx->b.destroy = si_destroy_context;
-	sctx->b.emit_string_marker = si_emit_string_marker;
-	sctx->b.set_debug_callback = si_set_debug_callback;
-	sctx->b.set_log_context = si_set_log_context;
-	sctx->b.set_context_param = si_set_context_param;
 	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
 
 	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
 	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
 
 	sctx->ws = sscreen->ws;
 	sctx->family = sscreen->info.family;
 	sctx->chip_class = sscreen->info.chip_class;
 
 	if (sscreen->info.has_gpu_reset_counter_query) {
 		sctx->gpu_reset_counter =
 			sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);
 	}
 
-	sctx->b.get_device_reset_status = si_get_reset_status;
-	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
-
-	si_init_context_texture_functions(sctx);
-	si_init_query_functions(sctx);
 
 	if (sctx->chip_class == CIK ||
 	    sctx->chip_class == VI ||
 	    sctx->chip_class == GFX9) {
 		sctx->eop_bug_scratch = si_resource(
 			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
 					   16 * sscreen->info.num_render_backends));
 		if (!sctx->eop_bug_scratch)
 			goto fail;
 	}
 
+	/* Initialize context allocators. */
 	sctx->allocator_zeroed_memory =
 		u_suballocator_create(&sctx->b, 128 * 1024,
 				      0, PIPE_USAGE_DEFAULT,
 				      SI_RESOURCE_FLAG_UNMAPPABLE |
 				      SI_RESOURCE_FLAG_CLEAR, false);
 	if (!sctx->allocator_zeroed_memory)
 		goto fail;
 
 	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
 						    0, PIPE_USAGE_STREAM,
@@ -459,38 +454,22 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
 	if (!sctx->ctx)
 		goto fail;
 
 	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
 		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
 						   (void*)si_flush_dma_cs,
 						   sctx, stop_exec_on_failure);
 	}
 
-	si_init_buffer_functions(sctx);
-	si_init_clear_functions(sctx);
-	si_init_blit_functions(sctx);
-	si_init_compute_functions(sctx);
-	si_init_compute_blit_functions(sctx);
-	si_init_debug_functions(sctx);
-	si_init_msaa_functions(sctx);
-	si_init_streamout_functions(sctx);
-
-	if (sscreen->info.has_hw_decode) {
-		sctx->b.create_video_codec = si_uvd_create_decoder;
-		sctx->b.create_video_buffer = si_video_buffer_create;
-	} else {
-		sctx->b.create_video_codec = vl_create_decoder;
-		sctx->b.create_video_buffer = vl_video_buffer_create;
-	}
-
-	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
+	sctx->gfx_cs = ws->cs_create(sctx->ctx,
+				     sctx->has_graphics ? RING_GFX : RING_COMPUTE,
 				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
 
 	/* Border colors. */
 	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
 					  sizeof(*sctx->border_color_table));
 	if (!sctx->border_color_table)
 		goto fail;
 
 	sctx->border_color_buffer = si_resource(
 		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
@@ -498,43 +477,76 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 				   sizeof(*sctx->border_color_table)));
 	if (!sctx->border_color_buffer)
 		goto fail;
 
 	sctx->border_color_map =
 		ws->buffer_map(sctx->border_color_buffer->buf,
 			       NULL, PIPE_TRANSFER_WRITE);
 	if (!sctx->border_color_map)
 		goto fail;
 
+	/* Initialize context functions used by graphics and compute. */
+	sctx->b.emit_string_marker = si_emit_string_marker;
+	sctx->b.set_debug_callback = si_set_debug_callback;
+	sctx->b.set_log_context = si_set_log_context;
+	sctx->b.set_context_param = si_set_context_param;
+	sctx->b.get_device_reset_status = si_get_reset_status;
+	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
+	sctx->b.memory_barrier = si_memory_barrier;
+
 	si_init_all_descriptors(sctx);
+	si_init_buffer_functions(sctx);
+	si_init_clear_functions(sctx);
+	si_init_blit_functions(sctx);
+	si_init_compute_functions(sctx);
+	si_init_compute_blit_functions(sctx);
+	si_init_debug_functions(sctx);
 	si_init_fence_functions(sctx);
-	si_init_state_functions(sctx);
-	si_init_shader_functions(sctx);
-	si_init_viewport_functions(sctx);
-
-	if (sctx->chip_class >= CIK)
-		cik_init_sdma_functions(sctx);
-	else
-		si_init_dma_functions(sctx);
 
 	if (sscreen->debug_flags & DBG(FORCE_DMA))
 		sctx->b.resource_copy_region = sctx->dma_copy;
 
-	sctx->blitter = util_blitter_create(&sctx->b);
-	if (sctx->blitter == NULL)
-		goto fail;
-	sctx->blitter->skip_viewport_restore = true;
+	/* Initialize graphics-only context functions. */
+	if (sctx->has_graphics) {
+		si_init_context_texture_functions(sctx);
+		si_init_query_functions(sctx);
+		si_init_msaa_functions(sctx);
+		si_init_shader_functions(sctx);
+		si_init_state_functions(sctx);
+		si_init_streamout_functions(sctx);
+		si_init_viewport_functions(sctx);
+
+		sctx->blitter = util_blitter_create(&sctx->b);
+		if (sctx->blitter == NULL)
+			goto fail;
+		sctx->blitter->skip_viewport_restore = true;
 
-	si_init_draw_functions(sctx);
+		si_init_draw_functions(sctx);
+	}
+
+	/* Initialize SDMA functions. */
+	if (sctx->chip_class >= CIK)
+		cik_init_sdma_functions(sctx);
+	else
+		si_init_dma_functions(sctx);
 
 	sctx->sample_mask = 0xffff;
 
+	/* Initialize multimedia functions. */
+	if (sscreen->info.has_hw_decode) {
+		sctx->b.create_video_codec = si_uvd_create_decoder;
+		sctx->b.create_video_buffer = si_video_buffer_create;
+	} else {
+		sctx->b.create_video_codec = vl_create_decoder;
+		sctx->b.create_video_buffer = vl_video_buffer_create;
+	}
+
 	if (sctx->chip_class >= GFX9) {
 		sctx->wait_mem_scratch = si_resource(
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
 		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
 				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
 	}
@@ -544,21 +556,22 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (sctx->chip_class == CIK) {
 		sctx->null_const_buf.buffer =
 			pipe_aligned_buffer_create(screen,
 						   SI_RESOURCE_FLAG_32BIT,
 						   PIPE_USAGE_DEFAULT, 16,
 						   sctx->screen->info.tcc_cache_line_size);
 		if (!sctx->null_const_buf.buffer)
 			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
 
-		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+		unsigned start_shader = sctx->has_graphics ? 0 :  PIPE_SHADER_COMPUTE;
+		for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
 			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
 				sctx->b.set_constant_buffer(&sctx->b, shader, i,
 							      &sctx->null_const_buf);
 			}
 		}
 
 		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
 				 &sctx->null_const_buf);
 		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
 				 &sctx->null_const_buf);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b01d5744752..348e8e5bd26 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -777,21 +777,21 @@  struct si_saved_cs {
 };
 
 struct si_context {
 	struct pipe_context		b; /* base class */
 
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 
 	struct radeon_winsys		*ws;
 	struct radeon_winsys_ctx	*ctx;
-	struct radeon_cmdbuf		*gfx_cs;
+	struct radeon_cmdbuf		*gfx_cs; /* compute IB if graphics is disabled */
 	struct radeon_cmdbuf		*dma_cs;
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct pipe_fence_handle	*last_sdma_fence;
 	struct si_resource		*eop_bug_scratch;
 	struct u_upload_mgr		*cached_gtt_allocator;
 	struct threaded_context		*tc;
 	struct u_suballocator		*allocator_zeroed_memory;
 	struct slab_child_pool		pool_transfers;
 	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
 	struct pipe_device_reset_callback device_reset_callback;
@@ -815,20 +815,21 @@  struct si_context {
 	void				*cs_clear_render_target;
 	void				*cs_clear_render_target_1d_array;
 	struct si_screen		*screen;
 	struct pipe_debug_callback	debug;
 	struct ac_llvm_compiler		compiler; /* only non-threaded compilation */
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	struct si_resource		*wait_mem_scratch;
 	unsigned			wait_mem_number;
 	uint16_t			prefetch_L2_mask;
 
+	bool				has_graphics;
 	bool				gfx_flush_in_progress:1;
 	bool				gfx_last_ib_is_busy:1;
 	bool				compute_is_busy:1;
 
 	unsigned			num_gfx_cs_flushes;
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
 	unsigned			last_dirty_tex_counter;
 	unsigned			last_compressed_colortex_counter;
 	unsigned			last_num_draw_calls;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b49a1b3695e..458b108a7e3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4699,21 +4699,21 @@  static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 
 	si_update_fb_dirtiness_after_rendering(sctx);
 
 	/* Multisample surfaces are flushed in si_decompress_textures. */
 	if (sctx->framebuffer.uncompressed_cb_mask)
 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
 					   sctx->framebuffer.CB_has_shader_readable_metadata);
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
-static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
 	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
 		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
@@ -4813,21 +4813,20 @@  void si_init_state_functions(struct si_context *sctx)
 	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
 
 	sctx->b.set_sample_mask = si_set_sample_mask;
 
 	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
 	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
 	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
 	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
 
 	sctx->b.texture_barrier = si_texture_barrier;
-	sctx->b.memory_barrier = si_memory_barrier;
 	sctx->b.set_min_samples = si_set_min_samples;
 	sctx->b.set_tess_state = si_set_tess_state;
 
 	sctx->b.set_active_query_state = si_set_active_query_state;
 
 	si_init_config(sctx);
 }
 
 void si_init_screen_state_functions(struct si_screen *sscreen)
 {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 767e789276a..6faa4c511b1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -482,20 +482,21 @@  void si_set_active_descriptors_for_shader(struct si_context *sctx,
 					  struct si_shader_selector *sel);
 bool si_bindless_descriptor_can_reclaim_slab(void *priv,
 					     struct pb_slab_entry *entry);
 struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
 						  unsigned entry_size,
 						  unsigned group_index);
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 		      uint64_t old_va);
 /* si_state.c */
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);
 void
 si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state);
 void
 si_make_texture_descriptor(struct si_screen *screen,
 			   struct si_texture *tex,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 9c968e39c2c..2a514f144b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -872,21 +872,21 @@  static void si_emit_draw_packets(struct si_context *sctx,
 				        S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
 		}
 	}
 }
 
 static void si_emit_surface_sync(struct si_context *sctx,
 				 unsigned cp_coher_cntl)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
-	if (sctx->chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
 		/* Flush caches and wait for the caches to assert idle. */
 		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
 		radeon_emit(cs, cp_coher_cntl);	/* CP_COHER_CNTL */
 		radeon_emit(cs, 0xffffffff);	/* CP_COHER_SIZE */
 		radeon_emit(cs, 0xffffff);	/* CP_COHER_SIZE_HI */
 		radeon_emit(cs, 0);		/* CP_COHER_BASE */
 		radeon_emit(cs, 0);		/* CP_COHER_BASE_HI */
 		radeon_emit(cs, 0x0000000A);	/* POLL_INTERVAL */
 	} else {
 		/* ACQUIRE_MEM is only required on a compute ring. */
@@ -895,20 +895,32 @@  static void si_emit_surface_sync(struct si_context *sctx,
 		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
 		radeon_emit(cs, 0);               /* CP_COHER_BASE */
 		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
 	}
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint32_t flags = sctx->flags;
+
+	if (!sctx->has_graphics) {
+		/* Only process compute flags. */
+		flags &= SI_CONTEXT_INV_ICACHE |
+			 SI_CONTEXT_INV_SMEM_L1 |
+			 SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
+			 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
+			 SI_CONTEXT_INV_L2_METADATA |
+			 SI_CONTEXT_CS_PARTIAL_FLUSH;
+	}
+
 	uint32_t cp_coher_cntl = 0;
 	uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 					SI_CONTEXT_FLUSH_AND_INV_DB);
 
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
 		sctx->num_cb_cache_flushes++;
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
 		sctx->num_db_cache_flushes++;
 
 	/* SI has a bug that it always flushes ICACHE and KCACHE if either
@@ -1061,25 +1073,26 @@  void si_emit_cache_flush(struct si_context *sctx)
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  sctx->wait_mem_scratch, va,
 				  sctx->wait_mem_number, SI_NOT_QUERY);
 		si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
 			       WAIT_REG_MEM_EQUAL);
 	}
 
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
-	if (cp_coher_cntl ||
-	    (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
-			    SI_CONTEXT_INV_VMEM_L1 |
-			    SI_CONTEXT_INV_GLOBAL_L2 |
-			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
+	if (sctx->has_graphics &&
+	    (cp_coher_cntl ||
+	     (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       SI_CONTEXT_INV_VMEM_L1 |
+		       SI_CONTEXT_INV_GLOBAL_L2 |
+		       SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
 
 	/* SI-CI-VI only:
 	 *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC
 	 *   waits for idle, so it should be last. SURFACE_SYNC is done in PFP.
 	 *
 	 * cp_coher_cntl should contain all necessary flags except TC flags
 	 * at this point.
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index a50088d2d8f..581f90a7b2f 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -457,20 +457,23 @@  static bool si_texture_discard_dcc(struct si_screen *sscreen,
  *   compressed tiled
  *
  * \param sctx  the current context if you have one, or sscreen->aux_context
  *              if you don't.
  */
 bool si_texture_disable_dcc(struct si_context *sctx,
 			    struct si_texture *tex)
 {
 	struct si_screen *sscreen = sctx->screen;
 
+	if (!sctx->has_graphics)
+		return si_texture_discard_dcc(sscreen, tex);
+
 	if (!si_can_disable_dcc(tex))
 		return false;
 
 	if (&sctx->b == sscreen->aux_context)
 		mtx_lock(&sscreen->aux_context_lock);
 
 	/* Decompress DCC. */
 	si_decompress_dcc(sctx, tex);
 	sctx->b.flush(&sctx->b, NULL, 0);
 

Comments

Hello Marek,

this series need a rebase (if you have some time).

Dieter

Am 12.02.2019 19:12, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> initialize all non-compute context functions to NULL.
> 
> v2: fix SI
> ---
>  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
>  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
>  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
>  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
>  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
>  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
>  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
>  src/gallium/drivers/radeonsi/si_state.h       |  1 +
>  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
>  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
>  11 files changed, 130 insertions(+), 75 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
> b/src/gallium/drivers/radeonsi/si_blit.c
> index bb8d1cbd12d..f39cb5d143f 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct 
> pipe_context *ctx,
> 
>  		if (separate_dcc_dirty) {
>  			tex->separate_dcc_dirty = false;
>  			vi_separate_dcc_process_and_reset_stats(ctx, tex);
>  		}
>  	}
>  }
> 
>  void si_decompress_dcc(struct si_context *sctx, struct si_texture 
> *tex)
>  {
> -	if (!tex->dcc_offset)
> +	/* If graphics is disabled, we can't decompress DCC, but it shouldn't
> +	 * be compressed either. The caller should simply discard it.
> +	 */
> +	if (!tex->dcc_offset || !sctx->has_graphics)
>  		return;
> 
>  	si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
>  				 0, util_max_layer(&tex->buffer.b.b, 0),
>  				 true);
>  }
> 
>  void si_init_blit_functions(struct si_context *sctx)
>  {
>  	sctx->b.resource_copy_region = si_resource_copy_region;
> -	sctx->b.blit = si_blit;
> -	sctx->b.flush_resource = si_flush_resource;
> -	sctx->b.generate_mipmap = si_generate_mipmap;
> +
> +	if (sctx->has_graphics) {
> +		sctx->b.blit = si_blit;
> +		sctx->b.flush_resource = si_flush_resource;
> +		sctx->b.generate_mipmap = si_generate_mipmap;
> +	}
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> b/src/gallium/drivers/radeonsi/si_clear.c
> index 9a00bb73b94..e1805f2a1c9 100644
> --- a/src/gallium/drivers/radeonsi/si_clear.c
> +++ b/src/gallium/drivers/radeonsi/si_clear.c
> @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context 
> *pipe,
>  			util_clear_render_target(pipe, sf, &color,
>  						 box->x, box->y,
>  						 box->width, box->height);
>  		}
>  	}
>  	pipe_surface_reference(&sf, NULL);
>  }
> 
>  void si_init_clear_functions(struct si_context *sctx)
>  {
> -	sctx->b.clear = si_clear;
>  	sctx->b.clear_render_target = si_clear_render_target;
> -	sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>  	sctx->b.clear_texture = si_clear_texture;
> +
> +	if (sctx->has_graphics) {
> +		sctx->b.clear = si_clear;
> +		sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> +	}
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 1a62b3e0844..87addd53976 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -880,26 +880,28 @@ static void si_launch_grid(
>  		info->block[0] * info->block[1] * info->block[2] > 256;
> 
>  	if (cs_regalloc_hang)
>  		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>  				 SI_CONTEXT_CS_PARTIAL_FLUSH;
> 
>  	if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
>  	    program->shader.compilation_failed)
>  		return;
> 
> -	if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> -		si_update_fb_dirtiness_after_rendering(sctx);
> -		sctx->last_num_draw_calls = sctx->num_draw_calls;
> -	}
> +	if (sctx->has_graphics) {
> +		if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> +			si_update_fb_dirtiness_after_rendering(sctx);
> +			sctx->last_num_draw_calls = sctx->num_draw_calls;
> +		}
> 
> -	si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> +		si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> +	}
> 
>  	/* Add buffer sizes for memory checking in need_cs_space. */
>  	si_context_add_resource_size(sctx, &program->shader.bo->b.b);
>  	/* TODO: add the scratch buffer */
> 
>  	if (info->indirect) {
>  		si_context_add_resource_size(sctx, info->indirect);
> 
>  		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
>  		if (sctx->chip_class <= VI &&
> @@ -917,21 +919,22 @@ static void si_launch_grid(
>  	if (sctx->flags)
>  		si_emit_cache_flush(sctx);
> 
>  	if (!si_switch_compute_shader(sctx, program, &program->shader,
>  					code_object, info->pc))
>  		return;
> 
>  	si_upload_compute_shader_descriptors(sctx);
>  	si_emit_compute_shader_pointers(sctx);
> 
> -	if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> +	if (sctx->has_graphics &&
> +	    si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>  		sctx->atoms.s.render_cond.emit(sctx);
>  		si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
>  	}
> 
>  	if ((program->input_size ||
>              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
>             unlikely(!si_upload_compute_input(sctx, code_object, 
> info))) {
>  		return;
>  	}
> 
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 21d4ca946d3..0f22c55723c 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -2640,22 +2640,24 @@ void
> si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
> 
>  	sctx->num_resident_handles += num_resident_tex_handles +
>  					num_resident_img_handles;
>  }
> 
>  /* INIT/DEINIT/UPLOAD */
> 
>  void si_init_all_descriptors(struct si_context *sctx)
>  {
>  	int i;
> +	unsigned first_shader =
> +		sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
> 
> -	for (i = 0; i < SI_NUM_SHADERS; i++) {
> +	for (i = first_shader; i < SI_NUM_SHADERS; i++) {
>  		bool is_2nd = sctx->chip_class >= GFX9 &&
>  				     (i == PIPE_SHADER_TESS_CTRL ||
>  				      i == PIPE_SHADER_GEOMETRY);
>  		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
>  		unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + 
> SI_NUM_CONST_BUFFERS;
>  		int rel_dw_offset;
>  		struct si_descriptors *desc;
> 
>  		if (is_2nd) {
>  			if (i == PIPE_SHADER_TESS_CTRL) {
> @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context 
> *sctx)
>  	si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
>  				     SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>  				     1024);
> 
>  	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
> 
>  	/* Set pipe_context functions. */
>  	sctx->b.bind_sampler_states = si_bind_sampler_states;
>  	sctx->b.set_shader_images = si_set_shader_images;
>  	sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
> -	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>  	sctx->b.set_shader_buffers = si_set_shader_buffers;
>  	sctx->b.set_sampler_views = si_set_sampler_views;
>  	sctx->b.create_texture_handle = si_create_texture_handle;
>  	sctx->b.delete_texture_handle = si_delete_texture_handle;
>  	sctx->b.make_texture_handle_resident = 
> si_make_texture_handle_resident;
>  	sctx->b.create_image_handle = si_create_image_handle;
>  	sctx->b.delete_image_handle = si_delete_image_handle;
>  	sctx->b.make_image_handle_resident = si_make_image_handle_resident;
> 
> +	if (!sctx->has_graphics)
> +		return;
> +
> +	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> +
>  	/* Shader user data. */
>  	sctx->atoms.s.shader_pointers.emit = 
> si_emit_graphics_shader_pointers;
> 
>  	/* Set default and immutable mappings. */
>  	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
> R_00B130_SPI_SHADER_USER_DATA_VS_0);
> 
>  	if (sctx->chip_class >= GFX9) {
>  		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
>  				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
>  		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> index 3d64587fa2b..d0e7cf20b4c 100644
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
> unsigned flags,
>  	 * This code is only needed when the driver flushes the GFX IB
>  	 * internally, and it never asks for a fence handle.
>  	 */
>  	if (radeon_emitted(ctx->dma_cs, 0)) {
>  		assert(fence == NULL); /* internal flushes only */
>  		si_flush_dma_cs(ctx, flags, NULL);
>  	}
> 
>  	ctx->gfx_flush_in_progress = true;
> 
> -	if (!LIST_IS_EMPTY(&ctx->active_queries))
> -		si_suspend_queries(ctx);
> -
> -	ctx->streamout.suspended = false;
> -	if (ctx->streamout.begin_emitted) {
> -		si_emit_streamout_end(ctx);
> -		ctx->streamout.suspended = true;
> +	if (ctx->has_graphics) {
> +		if (!LIST_IS_EMPTY(&ctx->active_queries))
> +			si_suspend_queries(ctx);
> +
> +		ctx->streamout.suspended = false;
> +		if (ctx->streamout.begin_emitted) {
> +			si_emit_streamout_end(ctx);
> +			ctx->streamout.suspended = true;
> +		}
>  	}
> 
>  	/* Make sure CP DMA is idle at the end of IBs after L2 prefetches
>  	 * because the kernel doesn't wait for it. */
>  	if (ctx->chip_class >= CIK)
>  		si_cp_dma_wait_for_idle(ctx);
> 
>  	/* Wait for draw calls to finish if needed. */
>  	if (wait_flags) {
>  		ctx->flags |= wait_flags;
> @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>  	 * IB starts drawing.
>  	 *
>  	 * TODO: Do we also need to invalidate CB & DB caches?
>  	 */
>  	ctx->flags |= SI_CONTEXT_INV_ICACHE |
>  		      SI_CONTEXT_INV_SMEM_L1 |
>  		      SI_CONTEXT_INV_VMEM_L1 |
>  		      SI_CONTEXT_INV_GLOBAL_L2 |
>  		      SI_CONTEXT_START_PIPELINE_STATS;
> 
> +	ctx->cs_shader_state.initialized = false;
> +	si_all_descriptors_begin_new_cs(ctx);
> +	si_all_resident_buffers_begin_new_cs(ctx);
> +
> +	if (!ctx->has_graphics) {
> +		ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
> +		return;
> +	}
> +
>  	/* set all valid group as dirty so they get reemited on
>  	 * next draw command
>  	 */
>  	si_pm4_reset_emitted(ctx);
> 
>  	/* The CS initialization should be emitted before everything else. */
>  	si_pm4_emit(ctx, ctx->init_config);
>  	if (ctx->init_config_gs_rings)
>  		si_pm4_emit(ctx, ctx->init_config_gs_rings);
> 
> @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
>  	if (ctx->chip_class >= GFX9)
>  		si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
>  	/* CLEAR_STATE disables all window rectangles. */
>  	if (!has_clear_state || ctx->num_window_rectangles > 0)
>  		si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
> -	si_all_descriptors_begin_new_cs(ctx);
> -	si_all_resident_buffers_begin_new_cs(ctx);
> 
>  	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>  	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>  	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
> 
>  	si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
>  	if (ctx->scratch_buffer) {
> @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>  	ctx->last_multi_vgt_param = -1;
>  	ctx->last_rast_prim = -1;
>  	ctx->last_sc_line_stipple = ~0;
>  	ctx->last_vs_state = ~0;
>  	ctx->last_ls = NULL;
>  	ctx->last_tcs = NULL;
>  	ctx->last_tes_sh_base = -1;
>  	ctx->last_num_tcs_input_cp = -1;
>  	ctx->last_ls_hs_config = -1; /* impossible value */
> 
> -	ctx->cs_shader_state.initialized = false;
> -
>  	if (has_clear_state) {
>  		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 
> 0x00000000;
>  		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 
> 0x00000000;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 20767c806d2..c2ec664d5a4 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -381,61 +381,56 @@ static struct pipe_context
> *si_create_context(struct pipe_screen *screen,
>  {
>  	struct si_context *sctx = CALLOC_STRUCT(si_context);
>  	struct si_screen* sscreen = (struct si_screen *)screen;
>  	struct radeon_winsys *ws = sscreen->ws;
>  	int shader, i;
>  	bool stop_exec_on_failure = (flags & 
> PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
> 
>  	if (!sctx)
>  		return NULL;
> 
> +	sctx->has_graphics = sscreen->info.chip_class == SI ||
> +			     !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
> +
>  	if (flags & PIPE_CONTEXT_DEBUG)
>  		sscreen->record_llvm_ir = true; /* racy but not critical */
> 
>  	sctx->b.screen = screen; /* this must be set first */
>  	sctx->b.priv = NULL;
>  	sctx->b.destroy = si_destroy_context;
> -	sctx->b.emit_string_marker = si_emit_string_marker;
> -	sctx->b.set_debug_callback = si_set_debug_callback;
> -	sctx->b.set_log_context = si_set_log_context;
> -	sctx->b.set_context_param = si_set_context_param;
>  	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>  	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
> 
>  	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
>  	slab_create_child(&sctx->pool_transfers_unsync, 
> &sscreen->pool_transfers);
> 
>  	sctx->ws = sscreen->ws;
>  	sctx->family = sscreen->info.family;
>  	sctx->chip_class = sscreen->info.chip_class;
> 
>  	if (sscreen->info.has_gpu_reset_counter_query) {
>  		sctx->gpu_reset_counter =
>  			sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);
>  	}
> 
> -	sctx->b.get_device_reset_status = si_get_reset_status;
> -	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> -
> -	si_init_context_texture_functions(sctx);
> -	si_init_query_functions(sctx);
> 
>  	if (sctx->chip_class == CIK ||
>  	    sctx->chip_class == VI ||
>  	    sctx->chip_class == GFX9) {
>  		sctx->eop_bug_scratch = si_resource(
>  			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
>  					   16 * sscreen->info.num_render_backends));
>  		if (!sctx->eop_bug_scratch)
>  			goto fail;
>  	}
> 
> +	/* Initialize context allocators. */
>  	sctx->allocator_zeroed_memory =
>  		u_suballocator_create(&sctx->b, 128 * 1024,
>  				      0, PIPE_USAGE_DEFAULT,
>  				      SI_RESOURCE_FLAG_UNMAPPABLE |
>  				      SI_RESOURCE_FLAG_CLEAR, false);
>  	if (!sctx->allocator_zeroed_memory)
>  		goto fail;
> 
>  	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>  						    0, PIPE_USAGE_STREAM,
> @@ -459,38 +454,22 @@ static struct pipe_context
> *si_create_context(struct pipe_screen *screen,
>  	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>  	if (!sctx->ctx)
>  		goto fail;
> 
>  	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
> DBG(NO_ASYNC_DMA))) {
>  		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>  						   (void*)si_flush_dma_cs,
>  						   sctx, stop_exec_on_failure);
>  	}
> 
> -	si_init_buffer_functions(sctx);
> -	si_init_clear_functions(sctx);
> -	si_init_blit_functions(sctx);
> -	si_init_compute_functions(sctx);
> -	si_init_compute_blit_functions(sctx);
> -	si_init_debug_functions(sctx);
> -	si_init_msaa_functions(sctx);
> -	si_init_streamout_functions(sctx);
> -
> -	if (sscreen->info.has_hw_decode) {
> -		sctx->b.create_video_codec = si_uvd_create_decoder;
> -		sctx->b.create_video_buffer = si_video_buffer_create;
> -	} else {
> -		sctx->b.create_video_codec = vl_create_decoder;
> -		sctx->b.create_video_buffer = vl_video_buffer_create;
> -	}
> -
> -	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> +	sctx->gfx_cs = ws->cs_create(sctx->ctx,
> +				     sctx->has_graphics ? RING_GFX : RING_COMPUTE,
>  				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
> 
>  	/* Border colors. */
>  	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>  					  sizeof(*sctx->border_color_table));
>  	if (!sctx->border_color_table)
>  		goto fail;
> 
>  	sctx->border_color_buffer = si_resource(
>  		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> @@ -498,43 +477,76 @@ static struct pipe_context
> *si_create_context(struct pipe_screen *screen,
>  				   sizeof(*sctx->border_color_table)));
>  	if (!sctx->border_color_buffer)
>  		goto fail;
> 
>  	sctx->border_color_map =
>  		ws->buffer_map(sctx->border_color_buffer->buf,
>  			       NULL, PIPE_TRANSFER_WRITE);
>  	if (!sctx->border_color_map)
>  		goto fail;
> 
> +	/* Initialize context functions used by graphics and compute. */
> +	sctx->b.emit_string_marker = si_emit_string_marker;
> +	sctx->b.set_debug_callback = si_set_debug_callback;
> +	sctx->b.set_log_context = si_set_log_context;
> +	sctx->b.set_context_param = si_set_context_param;
> +	sctx->b.get_device_reset_status = si_get_reset_status;
> +	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> +	sctx->b.memory_barrier = si_memory_barrier;
> +
>  	si_init_all_descriptors(sctx);
> +	si_init_buffer_functions(sctx);
> +	si_init_clear_functions(sctx);
> +	si_init_blit_functions(sctx);
> +	si_init_compute_functions(sctx);
> +	si_init_compute_blit_functions(sctx);
> +	si_init_debug_functions(sctx);
>  	si_init_fence_functions(sctx);
> -	si_init_state_functions(sctx);
> -	si_init_shader_functions(sctx);
> -	si_init_viewport_functions(sctx);
> -
> -	if (sctx->chip_class >= CIK)
> -		cik_init_sdma_functions(sctx);
> -	else
> -		si_init_dma_functions(sctx);
> 
>  	if (sscreen->debug_flags & DBG(FORCE_DMA))
>  		sctx->b.resource_copy_region = sctx->dma_copy;
> 
> -	sctx->blitter = util_blitter_create(&sctx->b);
> -	if (sctx->blitter == NULL)
> -		goto fail;
> -	sctx->blitter->skip_viewport_restore = true;
> +	/* Initialize graphics-only context functions. */
> +	if (sctx->has_graphics) {
> +		si_init_context_texture_functions(sctx);
> +		si_init_query_functions(sctx);
> +		si_init_msaa_functions(sctx);
> +		si_init_shader_functions(sctx);
> +		si_init_state_functions(sctx);
> +		si_init_streamout_functions(sctx);
> +		si_init_viewport_functions(sctx);
> +
> +		sctx->blitter = util_blitter_create(&sctx->b);
> +		if (sctx->blitter == NULL)
> +			goto fail;
> +		sctx->blitter->skip_viewport_restore = true;
> 
> -	si_init_draw_functions(sctx);
> +		si_init_draw_functions(sctx);
> +	}
> +
> +	/* Initialize SDMA functions. */
> +	if (sctx->chip_class >= CIK)
> +		cik_init_sdma_functions(sctx);
> +	else
> +		si_init_dma_functions(sctx);
> 
>  	sctx->sample_mask = 0xffff;
> 
> +	/* Initialize multimedia functions. */
> +	if (sscreen->info.has_hw_decode) {
> +		sctx->b.create_video_codec = si_uvd_create_decoder;
> +		sctx->b.create_video_buffer = si_video_buffer_create;
> +	} else {
> +		sctx->b.create_video_codec = vl_create_decoder;
> +		sctx->b.create_video_buffer = vl_video_buffer_create;
> +	}
> +
>  	if (sctx->chip_class >= GFX9) {
>  		sctx->wait_mem_scratch = si_resource(
>  			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
>  		if (!sctx->wait_mem_scratch)
>  			goto fail;
> 
>  		/* Initialize the memory. */
>  		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>  				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
>  	}
> @@ -544,21 +556,22 @@ static struct pipe_context
> *si_create_context(struct pipe_screen *screen,
>  	if (sctx->chip_class == CIK) {
>  		sctx->null_const_buf.buffer =
>  			pipe_aligned_buffer_create(screen,
>  						   SI_RESOURCE_FLAG_32BIT,
>  						   PIPE_USAGE_DEFAULT, 16,
>  						   sctx->screen->info.tcc_cache_line_size);
>  		if (!sctx->null_const_buf.buffer)
>  			goto fail;
>  		sctx->null_const_buf.buffer_size = 
> sctx->null_const_buf.buffer->width0;
> 
> -		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> +		unsigned start_shader = sctx->has_graphics ? 0 :  
> PIPE_SHADER_COMPUTE;
> +		for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
>  			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>  				sctx->b.set_constant_buffer(&sctx->b, shader, i,
>  							      &sctx->null_const_buf);
>  			}
>  		}
> 
>  		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>  				 &sctx->null_const_buf);
>  		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>  				 &sctx->null_const_buf);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index b01d5744752..348e8e5bd26 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -777,21 +777,21 @@ struct si_saved_cs {
>  };
> 
>  struct si_context {
>  	struct pipe_context		b; /* base class */
> 
>  	enum radeon_family		family;
>  	enum chip_class			chip_class;
> 
>  	struct radeon_winsys		*ws;
>  	struct radeon_winsys_ctx	*ctx;
> -	struct radeon_cmdbuf		*gfx_cs;
> +	struct radeon_cmdbuf		*gfx_cs; /* compute IB if graphics is disabled 
> */
>  	struct radeon_cmdbuf		*dma_cs;
>  	struct pipe_fence_handle	*last_gfx_fence;
>  	struct pipe_fence_handle	*last_sdma_fence;
>  	struct si_resource		*eop_bug_scratch;
>  	struct u_upload_mgr		*cached_gtt_allocator;
>  	struct threaded_context		*tc;
>  	struct u_suballocator		*allocator_zeroed_memory;
>  	struct slab_child_pool		pool_transfers;
>  	struct slab_child_pool		pool_transfers_unsync; /* for 
> threaded_context */
>  	struct pipe_device_reset_callback device_reset_callback;
> @@ -815,20 +815,21 @@ struct si_context {
>  	void				*cs_clear_render_target;
>  	void				*cs_clear_render_target_1d_array;
>  	struct si_screen		*screen;
>  	struct pipe_debug_callback	debug;
>  	struct ac_llvm_compiler		compiler; /* only non-threaded compilation 
> */
>  	struct si_shader_ctx_state	fixed_func_tcs_shader;
>  	struct si_resource		*wait_mem_scratch;
>  	unsigned			wait_mem_number;
>  	uint16_t			prefetch_L2_mask;
> 
> +	bool				has_graphics;
>  	bool				gfx_flush_in_progress:1;
>  	bool				gfx_last_ib_is_busy:1;
>  	bool				compute_is_busy:1;
> 
>  	unsigned			num_gfx_cs_flushes;
>  	unsigned			initial_gfx_cs_size;
>  	unsigned			gpu_reset_counter;
>  	unsigned			last_dirty_tex_counter;
>  	unsigned			last_compressed_colortex_counter;
>  	unsigned			last_num_draw_calls;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c
> index b49a1b3695e..458b108a7e3 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
> pipe_context *ctx, unsigned flags)
> 
>  	si_update_fb_dirtiness_after_rendering(sctx);
> 
>  	/* Multisample surfaces are flushed in si_decompress_textures. */
>  	if (sctx->framebuffer.uncompressed_cb_mask)
>  		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
>  					   sctx->framebuffer.CB_has_shader_readable_metadata);
>  }
> 
>  /* This only ensures coherency for shader image/buffer stores. */
> -static void si_memory_barrier(struct pipe_context *ctx, unsigned 
> flags)
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
> 
>  	/* Subsequent commands must wait for all shader invocations to
>  	 * complete. */
>  	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>  	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
> 
>  	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>  		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context 
> *sctx)
>  	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
> 
>  	sctx->b.set_sample_mask = si_set_sample_mask;
> 
>  	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>  	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>  	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>  	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
> 
>  	sctx->b.texture_barrier = si_texture_barrier;
> -	sctx->b.memory_barrier = si_memory_barrier;
>  	sctx->b.set_min_samples = si_set_min_samples;
>  	sctx->b.set_tess_state = si_set_tess_state;
> 
>  	sctx->b.set_active_query_state = si_set_active_query_state;
> 
>  	si_init_config(sctx);
>  }
> 
>  void si_init_screen_state_functions(struct si_screen *sscreen)
>  {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h
> b/src/gallium/drivers/radeonsi/si_state.h
> index 767e789276a..6faa4c511b1 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
> si_context *sctx,
>  					  struct si_shader_selector *sel);
>  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>  					     struct pb_slab_entry *entry);
>  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned 
> heap,
>  						  unsigned entry_size,
>  						  unsigned group_index);
>  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab 
> *pslab);
>  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource 
> *buf,
>  		      uint64_t old_va);
>  /* si_state.c */
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>  void si_init_state_functions(struct si_context *sctx);
>  void si_init_screen_state_functions(struct si_screen *sscreen);
>  void
>  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource 
> *buf,
>  			  enum pipe_format format,
>  			  unsigned offset, unsigned size,
>  			  uint32_t *state);
>  void
>  si_make_texture_descriptor(struct si_screen *screen,
>  			   struct si_texture *tex,
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 9c968e39c2c..2a514f144b9 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct 
> si_context *sctx,
>  				        S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>  		}
>  	}
>  }
> 
>  static void si_emit_surface_sync(struct si_context *sctx,
>  				 unsigned cp_coher_cntl)
>  {
>  	struct radeon_cmdbuf *cs = sctx->gfx_cs;
> 
> -	if (sctx->chip_class >= GFX9) {
> +	if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>  		/* Flush caches and wait for the caches to assert idle. */
>  		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>  		radeon_emit(cs, cp_coher_cntl);	/* CP_COHER_CNTL */
>  		radeon_emit(cs, 0xffffffff);	/* CP_COHER_SIZE */
>  		radeon_emit(cs, 0xffffff);	/* CP_COHER_SIZE_HI */
>  		radeon_emit(cs, 0);		/* CP_COHER_BASE */
>  		radeon_emit(cs, 0);		/* CP_COHER_BASE_HI */
>  		radeon_emit(cs, 0x0000000A);	/* POLL_INTERVAL */
>  	} else {
>  		/* ACQUIRE_MEM is only required on a compute ring. */
> @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct 
> si_context *sctx,
>  		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
>  		radeon_emit(cs, 0);               /* CP_COHER_BASE */
>  		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
>  	}
>  }
> 
>  void si_emit_cache_flush(struct si_context *sctx)
>  {
>  	struct radeon_cmdbuf *cs = sctx->gfx_cs;
>  	uint32_t flags = sctx->flags;
> +
> +	if (!sctx->has_graphics) {
> +		/* Only process compute flags. */
> +		flags &= SI_CONTEXT_INV_ICACHE |
> +			 SI_CONTEXT_INV_SMEM_L1 |
> +			 SI_CONTEXT_INV_VMEM_L1 |
> +			 SI_CONTEXT_INV_GLOBAL_L2 |
> +			 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> +			 SI_CONTEXT_INV_L2_METADATA |
> +			 SI_CONTEXT_CS_PARTIAL_FLUSH;
> +	}
> +
>  	uint32_t cp_coher_cntl = 0;
>  	uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>  					SI_CONTEXT_FLUSH_AND_INV_DB);
> 
>  	if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>  		sctx->num_cb_cache_flushes++;
>  	if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>  		sctx->num_db_cache_flushes++;
> 
>  	/* SI has a bug that it always flushes ICACHE and KCACHE if either
> @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context 
> *sctx)
>  				  EOP_DATA_SEL_VALUE_32BIT,
>  				  sctx->wait_mem_scratch, va,
>  				  sctx->wait_mem_number, SI_NOT_QUERY);
>  		si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
>  			       WAIT_REG_MEM_EQUAL);
>  	}
> 
>  	/* Make sure ME is idle (it executes most packets) before continuing.
>  	 * This prevents read-after-write hazards between PFP and ME.
>  	 */
> -	if (cp_coher_cntl ||
> -	    (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> -			    SI_CONTEXT_INV_VMEM_L1 |
> -			    SI_CONTEXT_INV_GLOBAL_L2 |
> -			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> +	if (sctx->has_graphics &&
> +	    (cp_coher_cntl ||
> +	     (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> +		       SI_CONTEXT_INV_VMEM_L1 |
> +		       SI_CONTEXT_INV_GLOBAL_L2 |
> +		       SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>  		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>  		radeon_emit(cs, 0);
>  	}
> 
>  	/* SI-CI-VI only:
>  	 *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, 
> SURFACE_SYNC
>  	 *   waits for idle, so it should be last. SURFACE_SYNC is done in 
> PFP.
>  	 *
>  	 * cp_coher_cntl should contain all necessary flags except TC flags
>  	 * at this point.
> diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> b/src/gallium/drivers/radeonsi/si_texture.c
> index a50088d2d8f..581f90a7b2f 100644
> --- a/src/gallium/drivers/radeonsi/si_texture.c
> +++ b/src/gallium/drivers/radeonsi/si_texture.c
> @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
> si_screen *sscreen,
>   *   compressed tiled
>   *
>   * \param sctx  the current context if you have one, or 
> sscreen->aux_context
>   *              if you don't.
>   */
>  bool si_texture_disable_dcc(struct si_context *sctx,
>  			    struct si_texture *tex)
>  {
>  	struct si_screen *sscreen = sctx->screen;
> 
> +	if (!sctx->has_graphics)
> +		return si_texture_discard_dcc(sscreen, tex);
> +
>  	if (!si_can_disable_dcc(tex))
>  		return false;
> 
>  	if (&sctx->b == sscreen->aux_context)
>  		mtx_lock(&sscreen->aux_context_lock);
> 
>  	/* Decompress DCC. */
>  	si_decompress_dcc(sctx, tex);
>  	sctx->b.flush(&sctx->b, NULL, 0);
I'll just push it.

Marek

On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de> wrote:

> Hello Marek,
>
> this series need a rebase (if you have some time).
>
> Dieter
>
> Am 12.02.2019 19:12, schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > initialize all non-compute context functions to NULL.
> >
> > v2: fix SI
> > ---
> >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
> >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
> >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
> >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
> >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
> >  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
> >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
> >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
> >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
> >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
> >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
> >  11 files changed, 130 insertions(+), 75 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
> > b/src/gallium/drivers/radeonsi/si_blit.c
> > index bb8d1cbd12d..f39cb5d143f 100644
> > --- a/src/gallium/drivers/radeonsi/si_blit.c
> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
> > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
> > pipe_context *ctx,
> >
> >               if (separate_dcc_dirty) {
> >                       tex->separate_dcc_dirty = false;
> >                       vi_separate_dcc_process_and_reset_stats(ctx, tex);
> >               }
> >       }
> >  }
> >
> >  void si_decompress_dcc(struct si_context *sctx, struct si_texture
> > *tex)
> >  {
> > -     if (!tex->dcc_offset)
> > +     /* If graphics is disabled, we can't decompress DCC, but it
> shouldn't
> > +      * be compressed either. The caller should simply discard it.
> > +      */
> > +     if (!tex->dcc_offset || !sctx->has_graphics)
> >               return;
> >
> >       si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
> >                                0, util_max_layer(&tex->buffer.b.b, 0),
> >                                true);
> >  }
> >
> >  void si_init_blit_functions(struct si_context *sctx)
> >  {
> >       sctx->b.resource_copy_region = si_resource_copy_region;
> > -     sctx->b.blit = si_blit;
> > -     sctx->b.flush_resource = si_flush_resource;
> > -     sctx->b.generate_mipmap = si_generate_mipmap;
> > +
> > +     if (sctx->has_graphics) {
> > +             sctx->b.blit = si_blit;
> > +             sctx->b.flush_resource = si_flush_resource;
> > +             sctx->b.generate_mipmap = si_generate_mipmap;
> > +     }
> >  }
> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> > b/src/gallium/drivers/radeonsi/si_clear.c
> > index 9a00bb73b94..e1805f2a1c9 100644
> > --- a/src/gallium/drivers/radeonsi/si_clear.c
> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
> > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
> > *pipe,
> >                       util_clear_render_target(pipe, sf, &color,
> >                                                box->x, box->y,
> >                                                box->width, box->height);
> >               }
> >       }
> >       pipe_surface_reference(&sf, NULL);
> >  }
> >
> >  void si_init_clear_functions(struct si_context *sctx)
> >  {
> > -     sctx->b.clear = si_clear;
> >       sctx->b.clear_render_target = si_clear_render_target;
> > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> >       sctx->b.clear_texture = si_clear_texture;
> > +
> > +     if (sctx->has_graphics) {
> > +             sctx->b.clear = si_clear;
> > +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> > +     }
> >  }
> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> > b/src/gallium/drivers/radeonsi/si_compute.c
> > index 1a62b3e0844..87addd53976 100644
> > --- a/src/gallium/drivers/radeonsi/si_compute.c
> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
> > @@ -880,26 +880,28 @@ static void si_launch_grid(
> >               info->block[0] * info->block[1] * info->block[2] > 256;
> >
> >       if (cs_regalloc_hang)
> >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
> >
> >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
> >           program->shader.compilation_failed)
> >               return;
> >
> > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > -             si_update_fb_dirtiness_after_rendering(sctx);
> > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
> > -     }
> > +     if (sctx->has_graphics) {
> > +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > +                     si_update_fb_dirtiness_after_rendering(sctx);
> > +                     sctx->last_num_draw_calls = sctx->num_draw_calls;
> > +             }
> >
> > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > +     }
> >
> >       /* Add buffer sizes for memory checking in need_cs_space. */
> >       si_context_add_resource_size(sctx, &program->shader.bo->b.b);
> >       /* TODO: add the scratch buffer */
> >
> >       if (info->indirect) {
> >               si_context_add_resource_size(sctx, info->indirect);
> >
> >               /* Indirect buffers use TC L2 on GFX9, but not older hw. */
> >               if (sctx->chip_class <= VI &&
> > @@ -917,21 +919,22 @@ static void si_launch_grid(
> >       if (sctx->flags)
> >               si_emit_cache_flush(sctx);
> >
> >       if (!si_switch_compute_shader(sctx, program, &program->shader,
> >                                       code_object, info->pc))
> >               return;
> >
> >       si_upload_compute_shader_descriptors(sctx);
> >       si_emit_compute_shader_pointers(sctx);
> >
> > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > +     if (sctx->has_graphics &&
> > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> >               sctx->atoms.s.render_cond.emit(sctx);
> >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
> >       }
> >
> >       if ((program->input_size ||
> >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
> >             unlikely(!si_upload_compute_input(sctx, code_object,
> > info))) {
> >               return;
> >       }
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> > b/src/gallium/drivers/radeonsi/si_descriptors.c
> > index 21d4ca946d3..0f22c55723c 100644
> > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> > @@ -2640,22 +2640,24 @@ void
> > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
> >
> >       sctx->num_resident_handles += num_resident_tex_handles +
> >                                       num_resident_img_handles;
> >  }
> >
> >  /* INIT/DEINIT/UPLOAD */
> >
> >  void si_init_all_descriptors(struct si_context *sctx)
> >  {
> >       int i;
> > +     unsigned first_shader =
> > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
> >
> > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
> > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
> >               bool is_2nd = sctx->chip_class >= GFX9 &&
> >                                    (i == PIPE_SHADER_TESS_CTRL ||
> >                                     i == PIPE_SHADER_GEOMETRY);
> >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
> SI_NUM_SAMPLERS;
> >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
> > SI_NUM_CONST_BUFFERS;
> >               int rel_dw_offset;
> >               struct si_descriptors *desc;
> >
> >               if (is_2nd) {
> >                       if (i == PIPE_SHADER_TESS_CTRL) {
> > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
> > *sctx)
> >       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
> >                                    SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
> >                                    1024);
> >
> >       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
> >
> >       /* Set pipe_context functions. */
> >       sctx->b.bind_sampler_states = si_bind_sampler_states;
> >       sctx->b.set_shader_images = si_set_shader_images;
> >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
> > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> >       sctx->b.set_shader_buffers = si_set_shader_buffers;
> >       sctx->b.set_sampler_views = si_set_sampler_views;
> >       sctx->b.create_texture_handle = si_create_texture_handle;
> >       sctx->b.delete_texture_handle = si_delete_texture_handle;
> >       sctx->b.make_texture_handle_resident =
> > si_make_texture_handle_resident;
> >       sctx->b.create_image_handle = si_create_image_handle;
> >       sctx->b.delete_image_handle = si_delete_image_handle;
> >       sctx->b.make_image_handle_resident = si_make_image_handle_resident;
> >
> > +     if (!sctx->has_graphics)
> > +             return;
> > +
> > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > +
> >       /* Shader user data. */
> >       sctx->atoms.s.shader_pointers.emit =
> > si_emit_graphics_shader_pointers;
> >
> >       /* Set default and immutable mappings. */
> >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
> > R_00B130_SPI_SHADER_USER_DATA_VS_0);
> >
> >       if (sctx->chip_class >= GFX9) {
> >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
> >                                     R_00B430_SPI_SHADER_USER_DATA_LS_0);
> >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
> > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > index 3d64587fa2b..d0e7cf20b4c 100644
> > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
> > unsigned flags,
> >        * This code is only needed when the driver flushes the GFX IB
> >        * internally, and it never asks for a fence handle.
> >        */
> >       if (radeon_emitted(ctx->dma_cs, 0)) {
> >               assert(fence == NULL); /* internal flushes only */
> >               si_flush_dma_cs(ctx, flags, NULL);
> >       }
> >
> >       ctx->gfx_flush_in_progress = true;
> >
> > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
> > -             si_suspend_queries(ctx);
> > -
> > -     ctx->streamout.suspended = false;
> > -     if (ctx->streamout.begin_emitted) {
> > -             si_emit_streamout_end(ctx);
> > -             ctx->streamout.suspended = true;
> > +     if (ctx->has_graphics) {
> > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
> > +                     si_suspend_queries(ctx);
> > +
> > +             ctx->streamout.suspended = false;
> > +             if (ctx->streamout.begin_emitted) {
> > +                     si_emit_streamout_end(ctx);
> > +                     ctx->streamout.suspended = true;
> > +             }
> >       }
> >
> >       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
> >        * because the kernel doesn't wait for it. */
> >       if (ctx->chip_class >= CIK)
> >               si_cp_dma_wait_for_idle(ctx);
> >
> >       /* Wait for draw calls to finish if needed. */
> >       if (wait_flags) {
> >               ctx->flags |= wait_flags;
> > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> >        * IB starts drawing.
> >        *
> >        * TODO: Do we also need to invalidate CB & DB caches?
> >        */
> >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
> >                     SI_CONTEXT_INV_SMEM_L1 |
> >                     SI_CONTEXT_INV_VMEM_L1 |
> >                     SI_CONTEXT_INV_GLOBAL_L2 |
> >                     SI_CONTEXT_START_PIPELINE_STATS;
> >
> > +     ctx->cs_shader_state.initialized = false;
> > +     si_all_descriptors_begin_new_cs(ctx);
> > +     si_all_resident_buffers_begin_new_cs(ctx);
> > +
> > +     if (!ctx->has_graphics) {
> > +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
> > +             return;
> > +     }
> > +
> >       /* set all valid group as dirty so they get reemited on
> >        * next draw command
> >        */
> >       si_pm4_reset_emitted(ctx);
> >
> >       /* The CS initialization should be emitted before everything else.
> */
> >       si_pm4_emit(ctx, ctx->init_config);
> >       if (ctx->init_config_gs_rings)
> >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
> >
> > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
> >       if (ctx->chip_class >= GFX9)
> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
> >       /* CLEAR_STATE disables all window rectangles. */
> >       if (!has_clear_state || ctx->num_window_rectangles > 0)
> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
> > -     si_all_descriptors_begin_new_cs(ctx);
> > -     si_all_resident_buffers_begin_new_cs(ctx);
> >
> >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> >       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) -
> 1;
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
> >
> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
> >       if (ctx->scratch_buffer) {
> > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> >       ctx->last_multi_vgt_param = -1;
> >       ctx->last_rast_prim = -1;
> >       ctx->last_sc_line_stipple = ~0;
> >       ctx->last_vs_state = ~0;
> >       ctx->last_ls = NULL;
> >       ctx->last_tcs = NULL;
> >       ctx->last_tes_sh_base = -1;
> >       ctx->last_num_tcs_input_cp = -1;
> >       ctx->last_ls_hs_config = -1; /* impossible value */
> >
> > -     ctx->cs_shader_state.initialized = false;
> > -
> >       if (has_clear_state) {
> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL]
> =
> > 0x00000000;
> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] =
> > 0x00000000;
> >
>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
> > 0x00000000;
> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL]
> =
> > 0x00000000;
> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
> 0xffffffff;
> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
> 0x00000000;
> >               ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT]
> =
> > 0x00000000;
> >
>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
> > 0x00000000;
> >
>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
> > 0x00000000;
> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> > b/src/gallium/drivers/radeonsi/si_pipe.c
> > index 20767c806d2..c2ec664d5a4 100644
> > --- a/src/gallium/drivers/radeonsi/si_pipe.c
> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> > @@ -381,61 +381,56 @@ static struct pipe_context
> > *si_create_context(struct pipe_screen *screen,
> >  {
> >       struct si_context *sctx = CALLOC_STRUCT(si_context);
> >       struct si_screen* sscreen = (struct si_screen *)screen;
> >       struct radeon_winsys *ws = sscreen->ws;
> >       int shader, i;
> >       bool stop_exec_on_failure = (flags &
> > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
> >
> >       if (!sctx)
> >               return NULL;
> >
> > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
> > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
> > +
> >       if (flags & PIPE_CONTEXT_DEBUG)
> >               sscreen->record_llvm_ir = true; /* racy but not critical */
> >
> >       sctx->b.screen = screen; /* this must be set first */
> >       sctx->b.priv = NULL;
> >       sctx->b.destroy = si_destroy_context;
> > -     sctx->b.emit_string_marker = si_emit_string_marker;
> > -     sctx->b.set_debug_callback = si_set_debug_callback;
> > -     sctx->b.set_log_context = si_set_log_context;
> > -     sctx->b.set_context_param = si_set_context_param;
> >       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
> >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
> >
> >       slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
> >       slab_create_child(&sctx->pool_transfers_unsync,
> > &sscreen->pool_transfers);
> >
> >       sctx->ws = sscreen->ws;
> >       sctx->family = sscreen->info.family;
> >       sctx->chip_class = sscreen->info.chip_class;
> >
> >       if (sscreen->info.has_gpu_reset_counter_query) {
> >               sctx->gpu_reset_counter =
> >                       sctx->ws->query_value(sctx->ws,
> RADEON_GPU_RESET_COUNTER);
> >       }
> >
> > -     sctx->b.get_device_reset_status = si_get_reset_status;
> > -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> > -
> > -     si_init_context_texture_functions(sctx);
> > -     si_init_query_functions(sctx);
> >
> >       if (sctx->chip_class == CIK ||
> >           sctx->chip_class == VI ||
> >           sctx->chip_class == GFX9) {
> >               sctx->eop_bug_scratch = si_resource(
> >                       pipe_buffer_create(&sscreen->b, 0,
> PIPE_USAGE_DEFAULT,
> >                                          16 *
> sscreen->info.num_render_backends));
> >               if (!sctx->eop_bug_scratch)
> >                       goto fail;
> >       }
> >
> > +     /* Initialize context allocators. */
> >       sctx->allocator_zeroed_memory =
> >               u_suballocator_create(&sctx->b, 128 * 1024,
> >                                     0, PIPE_USAGE_DEFAULT,
> >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
> >                                     SI_RESOURCE_FLAG_CLEAR, false);
> >       if (!sctx->allocator_zeroed_memory)
> >               goto fail;
> >
> >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
> >                                                   0, PIPE_USAGE_STREAM,
> > @@ -459,38 +454,22 @@ static struct pipe_context
> > *si_create_context(struct pipe_screen *screen,
> >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
> >       if (!sctx->ctx)
> >               goto fail;
> >
> >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
> > DBG(NO_ASYNC_DMA))) {
> >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
> >                                                  (void*)si_flush_dma_cs,
> >                                                  sctx,
> stop_exec_on_failure);
> >       }
> >
> > -     si_init_buffer_functions(sctx);
> > -     si_init_clear_functions(sctx);
> > -     si_init_blit_functions(sctx);
> > -     si_init_compute_functions(sctx);
> > -     si_init_compute_blit_functions(sctx);
> > -     si_init_debug_functions(sctx);
> > -     si_init_msaa_functions(sctx);
> > -     si_init_streamout_functions(sctx);
> > -
> > -     if (sscreen->info.has_hw_decode) {
> > -             sctx->b.create_video_codec = si_uvd_create_decoder;
> > -             sctx->b.create_video_buffer = si_video_buffer_create;
> > -     } else {
> > -             sctx->b.create_video_codec = vl_create_decoder;
> > -             sctx->b.create_video_buffer = vl_video_buffer_create;
> > -     }
> > -
> > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
> > +                                  sctx->has_graphics ? RING_GFX :
> RING_COMPUTE,
> >                                    (void*)si_flush_gfx_cs, sctx,
> stop_exec_on_failure);
> >
> >       /* Border colors. */
> >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
> >
>  sizeof(*sctx->border_color_table));
> >       if (!sctx->border_color_table)
> >               goto fail;
> >
> >       sctx->border_color_buffer = si_resource(
> >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> > @@ -498,43 +477,76 @@ static struct pipe_context
> > *si_create_context(struct pipe_screen *screen,
> >                                  sizeof(*sctx->border_color_table)));
> >       if (!sctx->border_color_buffer)
> >               goto fail;
> >
> >       sctx->border_color_map =
> >               ws->buffer_map(sctx->border_color_buffer->buf,
> >                              NULL, PIPE_TRANSFER_WRITE);
> >       if (!sctx->border_color_map)
> >               goto fail;
> >
> > +     /* Initialize context functions used by graphics and compute. */
> > +     sctx->b.emit_string_marker = si_emit_string_marker;
> > +     sctx->b.set_debug_callback = si_set_debug_callback;
> > +     sctx->b.set_log_context = si_set_log_context;
> > +     sctx->b.set_context_param = si_set_context_param;
> > +     sctx->b.get_device_reset_status = si_get_reset_status;
> > +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> > +     sctx->b.memory_barrier = si_memory_barrier;
> > +
> >       si_init_all_descriptors(sctx);
> > +     si_init_buffer_functions(sctx);
> > +     si_init_clear_functions(sctx);
> > +     si_init_blit_functions(sctx);
> > +     si_init_compute_functions(sctx);
> > +     si_init_compute_blit_functions(sctx);
> > +     si_init_debug_functions(sctx);
> >       si_init_fence_functions(sctx);
> > -     si_init_state_functions(sctx);
> > -     si_init_shader_functions(sctx);
> > -     si_init_viewport_functions(sctx);
> > -
> > -     if (sctx->chip_class >= CIK)
> > -             cik_init_sdma_functions(sctx);
> > -     else
> > -             si_init_dma_functions(sctx);
> >
> >       if (sscreen->debug_flags & DBG(FORCE_DMA))
> >               sctx->b.resource_copy_region = sctx->dma_copy;
> >
> > -     sctx->blitter = util_blitter_create(&sctx->b);
> > -     if (sctx->blitter == NULL)
> > -             goto fail;
> > -     sctx->blitter->skip_viewport_restore = true;
> > +     /* Initialize graphics-only context functions. */
> > +     if (sctx->has_graphics) {
> > +             si_init_context_texture_functions(sctx);
> > +             si_init_query_functions(sctx);
> > +             si_init_msaa_functions(sctx);
> > +             si_init_shader_functions(sctx);
> > +             si_init_state_functions(sctx);
> > +             si_init_streamout_functions(sctx);
> > +             si_init_viewport_functions(sctx);
> > +
> > +             sctx->blitter = util_blitter_create(&sctx->b);
> > +             if (sctx->blitter == NULL)
> > +                     goto fail;
> > +             sctx->blitter->skip_viewport_restore = true;
> >
> > -     si_init_draw_functions(sctx);
> > +             si_init_draw_functions(sctx);
> > +     }
> > +
> > +     /* Initialize SDMA functions. */
> > +     if (sctx->chip_class >= CIK)
> > +             cik_init_sdma_functions(sctx);
> > +     else
> > +             si_init_dma_functions(sctx);
> >
> >       sctx->sample_mask = 0xffff;
> >
> > +     /* Initialize multimedia functions. */
> > +     if (sscreen->info.has_hw_decode) {
> > +             sctx->b.create_video_codec = si_uvd_create_decoder;
> > +             sctx->b.create_video_buffer = si_video_buffer_create;
> > +     } else {
> > +             sctx->b.create_video_codec = vl_create_decoder;
> > +             sctx->b.create_video_buffer = vl_video_buffer_create;
> > +     }
> > +
> >       if (sctx->chip_class >= GFX9) {
> >               sctx->wait_mem_scratch = si_resource(
> >                       pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> 4));
> >               if (!sctx->wait_mem_scratch)
> >                       goto fail;
> >
> >               /* Initialize the memory. */
> >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
> >                                V_370_MEM, V_370_ME,
> &sctx->wait_mem_number);
> >       }
> > @@ -544,21 +556,22 @@ static struct pipe_context
> > *si_create_context(struct pipe_screen *screen,
> >       if (sctx->chip_class == CIK) {
> >               sctx->null_const_buf.buffer =
> >                       pipe_aligned_buffer_create(screen,
> >                                                  SI_RESOURCE_FLAG_32BIT,
> >                                                  PIPE_USAGE_DEFAULT, 16,
> >
> sctx->screen->info.tcc_cache_line_size);
> >               if (!sctx->null_const_buf.buffer)
> >                       goto fail;
> >               sctx->null_const_buf.buffer_size =
> > sctx->null_const_buf.buffer->width0;
> >
> > -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> > +             unsigned start_shader = sctx->has_graphics ? 0 :
> > PIPE_SHADER_COMPUTE;
> > +             for (shader = start_shader; shader < SI_NUM_SHADERS;
> shader++) {
> >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
> >                               sctx->b.set_constant_buffer(&sctx->b,
> shader, i,
> >
>  &sctx->null_const_buf);
> >                       }
> >               }
> >
> >               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
> >                                &sctx->null_const_buf);
> >               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
> >                                &sctx->null_const_buf);
> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> > b/src/gallium/drivers/radeonsi/si_pipe.h
> > index b01d5744752..348e8e5bd26 100644
> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> > @@ -777,21 +777,21 @@ struct si_saved_cs {
> >  };
> >
> >  struct si_context {
> >       struct pipe_context             b; /* base class */
> >
> >       enum radeon_family              family;
> >       enum chip_class                 chip_class;
> >
> >       struct radeon_winsys            *ws;
> >       struct radeon_winsys_ctx        *ctx;
> > -     struct radeon_cmdbuf            *gfx_cs;
> > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if graphics
> is disabled
> > */
> >       struct radeon_cmdbuf            *dma_cs;
> >       struct pipe_fence_handle        *last_gfx_fence;
> >       struct pipe_fence_handle        *last_sdma_fence;
> >       struct si_resource              *eop_bug_scratch;
> >       struct u_upload_mgr             *cached_gtt_allocator;
> >       struct threaded_context         *tc;
> >       struct u_suballocator           *allocator_zeroed_memory;
> >       struct slab_child_pool          pool_transfers;
> >       struct slab_child_pool          pool_transfers_unsync; /* for
> > threaded_context */
> >       struct pipe_device_reset_callback device_reset_callback;
> > @@ -815,20 +815,21 @@ struct si_context {
> >       void                            *cs_clear_render_target;
> >       void                            *cs_clear_render_target_1d_array;
> >       struct si_screen                *screen;
> >       struct pipe_debug_callback      debug;
> >       struct ac_llvm_compiler         compiler; /* only non-threaded
> compilation
> > */
> >       struct si_shader_ctx_state      fixed_func_tcs_shader;
> >       struct si_resource              *wait_mem_scratch;
> >       unsigned                        wait_mem_number;
> >       uint16_t                        prefetch_L2_mask;
> >
> > +     bool                            has_graphics;
> >       bool                            gfx_flush_in_progress:1;
> >       bool                            gfx_last_ib_is_busy:1;
> >       bool                            compute_is_busy:1;
> >
> >       unsigned                        num_gfx_cs_flushes;
> >       unsigned                        initial_gfx_cs_size;
> >       unsigned                        gpu_reset_counter;
> >       unsigned                        last_dirty_tex_counter;
> >       unsigned                        last_compressed_colortex_counter;
> >       unsigned                        last_num_draw_calls;
> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > b/src/gallium/drivers/radeonsi/si_state.c
> > index b49a1b3695e..458b108a7e3 100644
> > --- a/src/gallium/drivers/radeonsi/si_state.c
> > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
> > pipe_context *ctx, unsigned flags)
> >
> >       si_update_fb_dirtiness_after_rendering(sctx);
> >
> >       /* Multisample surfaces are flushed in si_decompress_textures. */
> >       if (sctx->framebuffer.uncompressed_cb_mask)
> >               si_make_CB_shader_coherent(sctx,
> sctx->framebuffer.nr_samples,
> >
> sctx->framebuffer.CB_has_shader_readable_metadata);
> >  }
> >
> >  /* This only ensures coherency for shader image/buffer stores. */
> > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
> > flags)
> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> >  {
> >       struct si_context *sctx = (struct si_context *)ctx;
> >
> >       /* Subsequent commands must wait for all shader invocations to
> >        * complete. */
> >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
> >
> >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
> >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
> > *sctx)
> >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
> >
> >       sctx->b.set_sample_mask = si_set_sample_mask;
> >
> >       sctx->b.create_vertex_elements_state = si_create_vertex_elements;
> >       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
> >       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
> >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
> >
> >       sctx->b.texture_barrier = si_texture_barrier;
> > -     sctx->b.memory_barrier = si_memory_barrier;
> >       sctx->b.set_min_samples = si_set_min_samples;
> >       sctx->b.set_tess_state = si_set_tess_state;
> >
> >       sctx->b.set_active_query_state = si_set_active_query_state;
> >
> >       si_init_config(sctx);
> >  }
> >
> >  void si_init_screen_state_functions(struct si_screen *sscreen)
> >  {
> > diff --git a/src/gallium/drivers/radeonsi/si_state.h
> > b/src/gallium/drivers/radeonsi/si_state.h
> > index 767e789276a..6faa4c511b1 100644
> > --- a/src/gallium/drivers/radeonsi/si_state.h
> > +++ b/src/gallium/drivers/radeonsi/si_state.h
> > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
> > si_context *sctx,
> >                                         struct si_shader_selector *sel);
> >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
> >                                            struct pb_slab_entry *entry);
> >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned
> > heap,
> >                                                 unsigned entry_size,
> >                                                 unsigned group_index);
> >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
> > *pslab);
> >  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
> > *buf,
> >                     uint64_t old_va);
> >  /* si_state.c */
> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
> >  void si_init_state_functions(struct si_context *sctx);
> >  void si_init_screen_state_functions(struct si_screen *sscreen);
> >  void
> >  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource
> > *buf,
> >                         enum pipe_format format,
> >                         unsigned offset, unsigned size,
> >                         uint32_t *state);
> >  void
> >  si_make_texture_descriptor(struct si_screen *screen,
> >                          struct si_texture *tex,
> > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> > b/src/gallium/drivers/radeonsi/si_state_draw.c
> > index 9c968e39c2c..2a514f144b9 100644
> > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
> > si_context *sctx,
> >
>  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
> >               }
> >       }
> >  }
> >
> >  static void si_emit_surface_sync(struct si_context *sctx,
> >                                unsigned cp_coher_cntl)
> >  {
> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> >
> > -     if (sctx->chip_class >= GFX9) {
> > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
> >               /* Flush caches and wait for the caches to assert idle. */
> >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
> >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
> >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
> >               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
> >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
> >               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
> >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
> >       } else {
> >               /* ACQUIRE_MEM is only required on a compute ring. */
> > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
> > si_context *sctx,
> >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
> >               radeon_emit(cs, 0);               /* CP_COHER_BASE */
> >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
> >       }
> >  }
> >
> >  void si_emit_cache_flush(struct si_context *sctx)
> >  {
> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> >       uint32_t flags = sctx->flags;
> > +
> > +     if (!sctx->has_graphics) {
> > +             /* Only process compute flags. */
> > +             flags &= SI_CONTEXT_INV_ICACHE |
> > +                      SI_CONTEXT_INV_SMEM_L1 |
> > +                      SI_CONTEXT_INV_VMEM_L1 |
> > +                      SI_CONTEXT_INV_GLOBAL_L2 |
> > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> > +                      SI_CONTEXT_INV_L2_METADATA |
> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
> > +     }
> > +
> >       uint32_t cp_coher_cntl = 0;
> >       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
> >                                       SI_CONTEXT_FLUSH_AND_INV_DB);
> >
> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
> >               sctx->num_cb_cache_flushes++;
> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
> >               sctx->num_db_cache_flushes++;
> >
> >       /* SI has a bug that it always flushes ICACHE and KCACHE if either
> > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
> > *sctx)
> >                                 EOP_DATA_SEL_VALUE_32BIT,
> >                                 sctx->wait_mem_scratch, va,
> >                                 sctx->wait_mem_number, SI_NOT_QUERY);
> >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
> 0xffffffff,
> >                              WAIT_REG_MEM_EQUAL);
> >       }
> >
> >       /* Make sure ME is idle (it executes most packets) before
> continuing.
> >        * This prevents read-after-write hazards between PFP and ME.
> >        */
> > -     if (cp_coher_cntl ||
> > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > -                         SI_CONTEXT_INV_VMEM_L1 |
> > -                         SI_CONTEXT_INV_GLOBAL_L2 |
> > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> > +     if (sctx->has_graphics &&
> > +         (cp_coher_cntl ||
> > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > +                    SI_CONTEXT_INV_VMEM_L1 |
> > +                    SI_CONTEXT_INV_GLOBAL_L2 |
> > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
> >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> >               radeon_emit(cs, 0);
> >       }
> >
> >       /* SI-CI-VI only:
> >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
> > SURFACE_SYNC
> >        *   waits for idle, so it should be last. SURFACE_SYNC is done in
> > PFP.
> >        *
> >        * cp_coher_cntl should contain all necessary flags except TC flags
> >        * at this point.
> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> > b/src/gallium/drivers/radeonsi/si_texture.c
> > index a50088d2d8f..581f90a7b2f 100644
> > --- a/src/gallium/drivers/radeonsi/si_texture.c
> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
> > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
> > si_screen *sscreen,
> >   *   compressed tiled
> >   *
> >   * \param sctx  the current context if you have one, or
> > sscreen->aux_context
> >   *              if you don't.
> >   */
> >  bool si_texture_disable_dcc(struct si_context *sctx,
> >                           struct si_texture *tex)
> >  {
> >       struct si_screen *sscreen = sctx->screen;
> >
> > +     if (!sctx->has_graphics)
> > +             return si_texture_discard_dcc(sscreen, tex);
> > +
> >       if (!si_can_disable_dcc(tex))
> >               return false;
> >
> >       if (&sctx->b == sscreen->aux_context)
> >               mtx_lock(&sscreen->aux_context_lock);
> >
> >       /* Decompress DCC. */
> >       si_decompress_dcc(sctx, tex);
> >       sctx->b.flush(&sctx->b, NULL, 0);
>
Can you add a bit of background why clover should/should not use other
rings?

I planned to test this, but my raven system can't run clover since kernel
4.20 release (BZ 109649), so I need to bisect that first.
Can this patch help address the soft lockup issue on CIK (BZ 108879)?
presumably, it was tested using clover on CIK, right?

Jan

On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo@gmail.com> wrote:

> I'll just push it.
>
> Marek
>
> On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de>
> wrote:
>
>> Hello Marek,
>>
>> this series need a rebase (if you have some time).
>>
>> Dieter
>>
>> Am 12.02.2019 19:12, schrieb Marek Olšák:
>> > From: Marek Olšák <marek.olsak@amd.com>
>> >
>> > initialize all non-compute context functions to NULL.
>> >
>> > v2: fix SI
>> > ---
>> >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
>> >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
>> >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
>> >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>> >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
>> >  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
>> >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
>> >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
>> >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
>> >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
>> >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
>> >  11 files changed, 130 insertions(+), 75 deletions(-)
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> > b/src/gallium/drivers/radeonsi/si_blit.c
>> > index bb8d1cbd12d..f39cb5d143f 100644
>> > --- a/src/gallium/drivers/radeonsi/si_blit.c
>> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
>> > pipe_context *ctx,
>> >
>> >               if (separate_dcc_dirty) {
>> >                       tex->separate_dcc_dirty = false;
>> >                       vi_separate_dcc_process_and_reset_stats(ctx, tex);
>> >               }
>> >       }
>> >  }
>> >
>> >  void si_decompress_dcc(struct si_context *sctx, struct si_texture
>> > *tex)
>> >  {
>> > -     if (!tex->dcc_offset)
>> > +     /* If graphics is disabled, we can't decompress DCC, but it
>> shouldn't
>> > +      * be compressed either. The caller should simply discard it.
>> > +      */
>> > +     if (!tex->dcc_offset || !sctx->has_graphics)
>> >               return;
>> >
>> >       si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
>> >                                0, util_max_layer(&tex->buffer.b.b, 0),
>> >                                true);
>> >  }
>> >
>> >  void si_init_blit_functions(struct si_context *sctx)
>> >  {
>> >       sctx->b.resource_copy_region = si_resource_copy_region;
>> > -     sctx->b.blit = si_blit;
>> > -     sctx->b.flush_resource = si_flush_resource;
>> > -     sctx->b.generate_mipmap = si_generate_mipmap;
>> > +
>> > +     if (sctx->has_graphics) {
>> > +             sctx->b.blit = si_blit;
>> > +             sctx->b.flush_resource = si_flush_resource;
>> > +             sctx->b.generate_mipmap = si_generate_mipmap;
>> > +     }
>> >  }
>> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
>> > b/src/gallium/drivers/radeonsi/si_clear.c
>> > index 9a00bb73b94..e1805f2a1c9 100644
>> > --- a/src/gallium/drivers/radeonsi/si_clear.c
>> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
>> > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
>> > *pipe,
>> >                       util_clear_render_target(pipe, sf, &color,
>> >                                                box->x, box->y,
>> >                                                box->width, box->height);
>> >               }
>> >       }
>> >       pipe_surface_reference(&sf, NULL);
>> >  }
>> >
>> >  void si_init_clear_functions(struct si_context *sctx)
>> >  {
>> > -     sctx->b.clear = si_clear;
>> >       sctx->b.clear_render_target = si_clear_render_target;
>> > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>> >       sctx->b.clear_texture = si_clear_texture;
>> > +
>> > +     if (sctx->has_graphics) {
>> > +             sctx->b.clear = si_clear;
>> > +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>> > +     }
>> >  }
>> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>> > b/src/gallium/drivers/radeonsi/si_compute.c
>> > index 1a62b3e0844..87addd53976 100644
>> > --- a/src/gallium/drivers/radeonsi/si_compute.c
>> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> > @@ -880,26 +880,28 @@ static void si_launch_grid(
>> >               info->block[0] * info->block[1] * info->block[2] > 256;
>> >
>> >       if (cs_regalloc_hang)
>> >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
>> >
>> >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
>> >           program->shader.compilation_failed)
>> >               return;
>> >
>> > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>> > -             si_update_fb_dirtiness_after_rendering(sctx);
>> > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
>> > -     }
>> > +     if (sctx->has_graphics) {
>> > +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>> > +                     si_update_fb_dirtiness_after_rendering(sctx);
>> > +                     sctx->last_num_draw_calls = sctx->num_draw_calls;
>> > +             }
>> >
>> > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>> > +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>> > +     }
>> >
>> >       /* Add buffer sizes for memory checking in need_cs_space. */
>> >       si_context_add_resource_size(sctx, &program->shader.bo->b.b);
>> >       /* TODO: add the scratch buffer */
>> >
>> >       if (info->indirect) {
>> >               si_context_add_resource_size(sctx, info->indirect);
>> >
>> >               /* Indirect buffers use TC L2 on GFX9, but not older hw.
>> */
>> >               if (sctx->chip_class <= VI &&
>> > @@ -917,21 +919,22 @@ static void si_launch_grid(
>> >       if (sctx->flags)
>> >               si_emit_cache_flush(sctx);
>> >
>> >       if (!si_switch_compute_shader(sctx, program, &program->shader,
>> >                                       code_object, info->pc))
>> >               return;
>> >
>> >       si_upload_compute_shader_descriptors(sctx);
>> >       si_emit_compute_shader_pointers(sctx);
>> >
>> > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>> > +     if (sctx->has_graphics &&
>> > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>> >               sctx->atoms.s.render_cond.emit(sctx);
>> >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
>> false);
>> >       }
>> >
>> >       if ((program->input_size ||
>> >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
>> >             unlikely(!si_upload_compute_input(sctx, code_object,
>> > info))) {
>> >               return;
>> >       }
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>> > b/src/gallium/drivers/radeonsi/si_descriptors.c
>> > index 21d4ca946d3..0f22c55723c 100644
>> > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> > @@ -2640,22 +2640,24 @@ void
>> > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
>> >
>> >       sctx->num_resident_handles += num_resident_tex_handles +
>> >                                       num_resident_img_handles;
>> >  }
>> >
>> >  /* INIT/DEINIT/UPLOAD */
>> >
>> >  void si_init_all_descriptors(struct si_context *sctx)
>> >  {
>> >       int i;
>> > +     unsigned first_shader =
>> > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
>> >
>> > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
>> > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
>> >               bool is_2nd = sctx->chip_class >= GFX9 &&
>> >                                    (i == PIPE_SHADER_TESS_CTRL ||
>> >                                     i == PIPE_SHADER_GEOMETRY);
>> >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
>> SI_NUM_SAMPLERS;
>> >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
>> > SI_NUM_CONST_BUFFERS;
>> >               int rel_dw_offset;
>> >               struct si_descriptors *desc;
>> >
>> >               if (is_2nd) {
>> >                       if (i == PIPE_SHADER_TESS_CTRL) {
>> > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
>> > *sctx)
>> >       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
>> >                                    SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>> >                                    1024);
>> >
>> >       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>> >
>> >       /* Set pipe_context functions. */
>> >       sctx->b.bind_sampler_states = si_bind_sampler_states;
>> >       sctx->b.set_shader_images = si_set_shader_images;
>> >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
>> > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>> >       sctx->b.set_shader_buffers = si_set_shader_buffers;
>> >       sctx->b.set_sampler_views = si_set_sampler_views;
>> >       sctx->b.create_texture_handle = si_create_texture_handle;
>> >       sctx->b.delete_texture_handle = si_delete_texture_handle;
>> >       sctx->b.make_texture_handle_resident =
>> > si_make_texture_handle_resident;
>> >       sctx->b.create_image_handle = si_create_image_handle;
>> >       sctx->b.delete_image_handle = si_delete_image_handle;
>> >       sctx->b.make_image_handle_resident =
>> si_make_image_handle_resident;
>> >
>> > +     if (!sctx->has_graphics)
>> > +             return;
>> > +
>> > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>> > +
>> >       /* Shader user data. */
>> >       sctx->atoms.s.shader_pointers.emit =
>> > si_emit_graphics_shader_pointers;
>> >
>> >       /* Set default and immutable mappings. */
>> >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
>> > R_00B130_SPI_SHADER_USER_DATA_VS_0);
>> >
>> >       if (sctx->chip_class >= GFX9) {
>> >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
>> >                                     R_00B430_SPI_SHADER_USER_DATA_LS_0);
>> >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
>> > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > index 3d64587fa2b..d0e7cf20b4c 100644
>> > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>> > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
>> > unsigned flags,
>> >        * This code is only needed when the driver flushes the GFX IB
>> >        * internally, and it never asks for a fence handle.
>> >        */
>> >       if (radeon_emitted(ctx->dma_cs, 0)) {
>> >               assert(fence == NULL); /* internal flushes only */
>> >               si_flush_dma_cs(ctx, flags, NULL);
>> >       }
>> >
>> >       ctx->gfx_flush_in_progress = true;
>> >
>> > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
>> > -             si_suspend_queries(ctx);
>> > -
>> > -     ctx->streamout.suspended = false;
>> > -     if (ctx->streamout.begin_emitted) {
>> > -             si_emit_streamout_end(ctx);
>> > -             ctx->streamout.suspended = true;
>> > +     if (ctx->has_graphics) {
>> > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
>> > +                     si_suspend_queries(ctx);
>> > +
>> > +             ctx->streamout.suspended = false;
>> > +             if (ctx->streamout.begin_emitted) {
>> > +                     si_emit_streamout_end(ctx);
>> > +                     ctx->streamout.suspended = true;
>> > +             }
>> >       }
>> >
>> >       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
>> >        * because the kernel doesn't wait for it. */
>> >       if (ctx->chip_class >= CIK)
>> >               si_cp_dma_wait_for_idle(ctx);
>> >
>> >       /* Wait for draw calls to finish if needed. */
>> >       if (wait_flags) {
>> >               ctx->flags |= wait_flags;
>> > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >        * IB starts drawing.
>> >        *
>> >        * TODO: Do we also need to invalidate CB & DB caches?
>> >        */
>> >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
>> >                     SI_CONTEXT_INV_SMEM_L1 |
>> >                     SI_CONTEXT_INV_VMEM_L1 |
>> >                     SI_CONTEXT_INV_GLOBAL_L2 |
>> >                     SI_CONTEXT_START_PIPELINE_STATS;
>> >
>> > +     ctx->cs_shader_state.initialized = false;
>> > +     si_all_descriptors_begin_new_cs(ctx);
>> > +     si_all_resident_buffers_begin_new_cs(ctx);
>> > +
>> > +     if (!ctx->has_graphics) {
>> > +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
>> > +             return;
>> > +     }
>> > +
>> >       /* set all valid group as dirty so they get reemited on
>> >        * next draw command
>> >        */
>> >       si_pm4_reset_emitted(ctx);
>> >
>> >       /* The CS initialization should be emitted before everything
>> else. */
>> >       si_pm4_emit(ctx, ctx->init_config);
>> >       if (ctx->init_config_gs_rings)
>> >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
>> >
>> > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
>> >       if (ctx->chip_class >= GFX9)
>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
>> >       /* CLEAR_STATE disables all window rectangles. */
>> >       if (!has_clear_state || ctx->num_window_rectangles > 0)
>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
>> > -     si_all_descriptors_begin_new_cs(ctx);
>> > -     si_all_resident_buffers_begin_new_cs(ctx);
>> >
>> >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>> >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>> >       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) -
>> 1;
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
>> >
>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
>> >       if (ctx->scratch_buffer) {
>> > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>> >       ctx->last_multi_vgt_param = -1;
>> >       ctx->last_rast_prim = -1;
>> >       ctx->last_sc_line_stipple = ~0;
>> >       ctx->last_vs_state = ~0;
>> >       ctx->last_ls = NULL;
>> >       ctx->last_tcs = NULL;
>> >       ctx->last_tes_sh_base = -1;
>> >       ctx->last_num_tcs_input_cp = -1;
>> >       ctx->last_ls_hs_config = -1; /* impossible value */
>> >
>> > -     ctx->cs_shader_state.initialized = false;
>> > -
>> >       if (has_clear_state) {
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL]
>> =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
>> =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL]
>> =
>> > 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
>> 0xffffffff;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
>> 0x00000000;
>> >               ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT]
>> =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
>> > 0x00000000;
>> >
>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
>> > 0x00000000;
>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> > b/src/gallium/drivers/radeonsi/si_pipe.c
>> > index 20767c806d2..c2ec664d5a4 100644
>> > --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> > @@ -381,61 +381,56 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >  {
>> >       struct si_context *sctx = CALLOC_STRUCT(si_context);
>> >       struct si_screen* sscreen = (struct si_screen *)screen;
>> >       struct radeon_winsys *ws = sscreen->ws;
>> >       int shader, i;
>> >       bool stop_exec_on_failure = (flags &
>> > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
>> >
>> >       if (!sctx)
>> >               return NULL;
>> >
>> > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
>> > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
>> > +
>> >       if (flags & PIPE_CONTEXT_DEBUG)
>> >               sscreen->record_llvm_ir = true; /* racy but not critical
>> */
>> >
>> >       sctx->b.screen = screen; /* this must be set first */
>> >       sctx->b.priv = NULL;
>> >       sctx->b.destroy = si_destroy_context;
>> > -     sctx->b.emit_string_marker = si_emit_string_marker;
>> > -     sctx->b.set_debug_callback = si_set_debug_callback;
>> > -     sctx->b.set_log_context = si_set_log_context;
>> > -     sctx->b.set_context_param = si_set_context_param;
>> >       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>> >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
>> >
>> >       slab_create_child(&sctx->pool_transfers,
>> &sscreen->pool_transfers);
>> >       slab_create_child(&sctx->pool_transfers_unsync,
>> > &sscreen->pool_transfers);
>> >
>> >       sctx->ws = sscreen->ws;
>> >       sctx->family = sscreen->info.family;
>> >       sctx->chip_class = sscreen->info.chip_class;
>> >
>> >       if (sscreen->info.has_gpu_reset_counter_query) {
>> >               sctx->gpu_reset_counter =
>> >                       sctx->ws->query_value(sctx->ws,
>> RADEON_GPU_RESET_COUNTER);
>> >       }
>> >
>> > -     sctx->b.get_device_reset_status = si_get_reset_status;
>> > -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>> > -
>> > -     si_init_context_texture_functions(sctx);
>> > -     si_init_query_functions(sctx);
>> >
>> >       if (sctx->chip_class == CIK ||
>> >           sctx->chip_class == VI ||
>> >           sctx->chip_class == GFX9) {
>> >               sctx->eop_bug_scratch = si_resource(
>> >                       pipe_buffer_create(&sscreen->b, 0,
>> PIPE_USAGE_DEFAULT,
>> >                                          16 *
>> sscreen->info.num_render_backends));
>> >               if (!sctx->eop_bug_scratch)
>> >                       goto fail;
>> >       }
>> >
>> > +     /* Initialize context allocators. */
>> >       sctx->allocator_zeroed_memory =
>> >               u_suballocator_create(&sctx->b, 128 * 1024,
>> >                                     0, PIPE_USAGE_DEFAULT,
>> >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
>> >                                     SI_RESOURCE_FLAG_CLEAR, false);
>> >       if (!sctx->allocator_zeroed_memory)
>> >               goto fail;
>> >
>> >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>> >                                                   0, PIPE_USAGE_STREAM,
>> > @@ -459,38 +454,22 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>> >       if (!sctx->ctx)
>> >               goto fail;
>> >
>> >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
>> > DBG(NO_ASYNC_DMA))) {
>> >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>> >                                                  (void*)si_flush_dma_cs,
>> >                                                  sctx,
>> stop_exec_on_failure);
>> >       }
>> >
>> > -     si_init_buffer_functions(sctx);
>> > -     si_init_clear_functions(sctx);
>> > -     si_init_blit_functions(sctx);
>> > -     si_init_compute_functions(sctx);
>> > -     si_init_compute_blit_functions(sctx);
>> > -     si_init_debug_functions(sctx);
>> > -     si_init_msaa_functions(sctx);
>> > -     si_init_streamout_functions(sctx);
>> > -
>> > -     if (sscreen->info.has_hw_decode) {
>> > -             sctx->b.create_video_codec = si_uvd_create_decoder;
>> > -             sctx->b.create_video_buffer = si_video_buffer_create;
>> > -     } else {
>> > -             sctx->b.create_video_codec = vl_create_decoder;
>> > -             sctx->b.create_video_buffer = vl_video_buffer_create;
>> > -     }
>> > -
>> > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
>> > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
>> > +                                  sctx->has_graphics ? RING_GFX :
>> RING_COMPUTE,
>> >                                    (void*)si_flush_gfx_cs, sctx,
>> stop_exec_on_failure);
>> >
>> >       /* Border colors. */
>> >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>> >
>>  sizeof(*sctx->border_color_table));
>> >       if (!sctx->border_color_table)
>> >               goto fail;
>> >
>> >       sctx->border_color_buffer = si_resource(
>> >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>> > @@ -498,43 +477,76 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >                                  sizeof(*sctx->border_color_table)));
>> >       if (!sctx->border_color_buffer)
>> >               goto fail;
>> >
>> >       sctx->border_color_map =
>> >               ws->buffer_map(sctx->border_color_buffer->buf,
>> >                              NULL, PIPE_TRANSFER_WRITE);
>> >       if (!sctx->border_color_map)
>> >               goto fail;
>> >
>> > +     /* Initialize context functions used by graphics and compute. */
>> > +     sctx->b.emit_string_marker = si_emit_string_marker;
>> > +     sctx->b.set_debug_callback = si_set_debug_callback;
>> > +     sctx->b.set_log_context = si_set_log_context;
>> > +     sctx->b.set_context_param = si_set_context_param;
>> > +     sctx->b.get_device_reset_status = si_get_reset_status;
>> > +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>> > +     sctx->b.memory_barrier = si_memory_barrier;
>> > +
>> >       si_init_all_descriptors(sctx);
>> > +     si_init_buffer_functions(sctx);
>> > +     si_init_clear_functions(sctx);
>> > +     si_init_blit_functions(sctx);
>> > +     si_init_compute_functions(sctx);
>> > +     si_init_compute_blit_functions(sctx);
>> > +     si_init_debug_functions(sctx);
>> >       si_init_fence_functions(sctx);
>> > -     si_init_state_functions(sctx);
>> > -     si_init_shader_functions(sctx);
>> > -     si_init_viewport_functions(sctx);
>> > -
>> > -     if (sctx->chip_class >= CIK)
>> > -             cik_init_sdma_functions(sctx);
>> > -     else
>> > -             si_init_dma_functions(sctx);
>> >
>> >       if (sscreen->debug_flags & DBG(FORCE_DMA))
>> >               sctx->b.resource_copy_region = sctx->dma_copy;
>> >
>> > -     sctx->blitter = util_blitter_create(&sctx->b);
>> > -     if (sctx->blitter == NULL)
>> > -             goto fail;
>> > -     sctx->blitter->skip_viewport_restore = true;
>> > +     /* Initialize graphics-only context functions. */
>> > +     if (sctx->has_graphics) {
>> > +             si_init_context_texture_functions(sctx);
>> > +             si_init_query_functions(sctx);
>> > +             si_init_msaa_functions(sctx);
>> > +             si_init_shader_functions(sctx);
>> > +             si_init_state_functions(sctx);
>> > +             si_init_streamout_functions(sctx);
>> > +             si_init_viewport_functions(sctx);
>> > +
>> > +             sctx->blitter = util_blitter_create(&sctx->b);
>> > +             if (sctx->blitter == NULL)
>> > +                     goto fail;
>> > +             sctx->blitter->skip_viewport_restore = true;
>> >
>> > -     si_init_draw_functions(sctx);
>> > +             si_init_draw_functions(sctx);
>> > +     }
>> > +
>> > +     /* Initialize SDMA functions. */
>> > +     if (sctx->chip_class >= CIK)
>> > +             cik_init_sdma_functions(sctx);
>> > +     else
>> > +             si_init_dma_functions(sctx);
>> >
>> >       sctx->sample_mask = 0xffff;
>> >
>> > +     /* Initialize multimedia functions. */
>> > +     if (sscreen->info.has_hw_decode) {
>> > +             sctx->b.create_video_codec = si_uvd_create_decoder;
>> > +             sctx->b.create_video_buffer = si_video_buffer_create;
>> > +     } else {
>> > +             sctx->b.create_video_codec = vl_create_decoder;
>> > +             sctx->b.create_video_buffer = vl_video_buffer_create;
>> > +     }
>> > +
>> >       if (sctx->chip_class >= GFX9) {
>> >               sctx->wait_mem_scratch = si_resource(
>> >                       pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>> 4));
>> >               if (!sctx->wait_mem_scratch)
>> >                       goto fail;
>> >
>> >               /* Initialize the memory. */
>> >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>> >                                V_370_MEM, V_370_ME,
>> &sctx->wait_mem_number);
>> >       }
>> > @@ -544,21 +556,22 @@ static struct pipe_context
>> > *si_create_context(struct pipe_screen *screen,
>> >       if (sctx->chip_class == CIK) {
>> >               sctx->null_const_buf.buffer =
>> >                       pipe_aligned_buffer_create(screen,
>> >                                                  SI_RESOURCE_FLAG_32BIT,
>> >                                                  PIPE_USAGE_DEFAULT, 16,
>> >
>> sctx->screen->info.tcc_cache_line_size);
>> >               if (!sctx->null_const_buf.buffer)
>> >                       goto fail;
>> >               sctx->null_const_buf.buffer_size =
>> > sctx->null_const_buf.buffer->width0;
>> >
>> > -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>> > +             unsigned start_shader = sctx->has_graphics ? 0 :
>> > PIPE_SHADER_COMPUTE;
>> > +             for (shader = start_shader; shader < SI_NUM_SHADERS;
>> shader++) {
>> >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>> >                               sctx->b.set_constant_buffer(&sctx->b,
>> shader, i,
>> >
>>  &sctx->null_const_buf);
>> >                       }
>> >               }
>> >
>> >               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>> >                                &sctx->null_const_buf);
>> >               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>> >                                &sctx->null_const_buf);
>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> > b/src/gallium/drivers/radeonsi/si_pipe.h
>> > index b01d5744752..348e8e5bd26 100644
>> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> > @@ -777,21 +777,21 @@ struct si_saved_cs {
>> >  };
>> >
>> >  struct si_context {
>> >       struct pipe_context             b; /* base class */
>> >
>> >       enum radeon_family              family;
>> >       enum chip_class                 chip_class;
>> >
>> >       struct radeon_winsys            *ws;
>> >       struct radeon_winsys_ctx        *ctx;
>> > -     struct radeon_cmdbuf            *gfx_cs;
>> > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
>> graphics is disabled
>> > */
>> >       struct radeon_cmdbuf            *dma_cs;
>> >       struct pipe_fence_handle        *last_gfx_fence;
>> >       struct pipe_fence_handle        *last_sdma_fence;
>> >       struct si_resource              *eop_bug_scratch;
>> >       struct u_upload_mgr             *cached_gtt_allocator;
>> >       struct threaded_context         *tc;
>> >       struct u_suballocator           *allocator_zeroed_memory;
>> >       struct slab_child_pool          pool_transfers;
>> >       struct slab_child_pool          pool_transfers_unsync; /* for
>> > threaded_context */
>> >       struct pipe_device_reset_callback device_reset_callback;
>> > @@ -815,20 +815,21 @@ struct si_context {
>> >       void                            *cs_clear_render_target;
>> >       void                            *cs_clear_render_target_1d_array;
>> >       struct si_screen                *screen;
>> >       struct pipe_debug_callback      debug;
>> >       struct ac_llvm_compiler         compiler; /* only non-threaded
>> compilation
>> > */
>> >       struct si_shader_ctx_state      fixed_func_tcs_shader;
>> >       struct si_resource              *wait_mem_scratch;
>> >       unsigned                        wait_mem_number;
>> >       uint16_t                        prefetch_L2_mask;
>> >
>> > +     bool                            has_graphics;
>> >       bool                            gfx_flush_in_progress:1;
>> >       bool                            gfx_last_ib_is_busy:1;
>> >       bool                            compute_is_busy:1;
>> >
>> >       unsigned                        num_gfx_cs_flushes;
>> >       unsigned                        initial_gfx_cs_size;
>> >       unsigned                        gpu_reset_counter;
>> >       unsigned                        last_dirty_tex_counter;
>> >       unsigned                        last_compressed_colortex_counter;
>> >       unsigned                        last_num_draw_calls;
>> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> > b/src/gallium/drivers/radeonsi/si_state.c
>> > index b49a1b3695e..458b108a7e3 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state.c
>> > +++ b/src/gallium/drivers/radeonsi/si_state.c
>> > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
>> > pipe_context *ctx, unsigned flags)
>> >
>> >       si_update_fb_dirtiness_after_rendering(sctx);
>> >
>> >       /* Multisample surfaces are flushed in si_decompress_textures. */
>> >       if (sctx->framebuffer.uncompressed_cb_mask)
>> >               si_make_CB_shader_coherent(sctx,
>> sctx->framebuffer.nr_samples,
>> >
>> sctx->framebuffer.CB_has_shader_readable_metadata);
>> >  }
>> >
>> >  /* This only ensures coherency for shader image/buffer stores. */
>> > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
>> > flags)
>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>> >  {
>> >       struct si_context *sctx = (struct si_context *)ctx;
>> >
>> >       /* Subsequent commands must wait for all shader invocations to
>> >        * complete. */
>> >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
>> >
>> >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>> >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
>> > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
>> > *sctx)
>> >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
>> >
>> >       sctx->b.set_sample_mask = si_set_sample_mask;
>> >
>> >       sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>> >       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>> >       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>> >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
>> >
>> >       sctx->b.texture_barrier = si_texture_barrier;
>> > -     sctx->b.memory_barrier = si_memory_barrier;
>> >       sctx->b.set_min_samples = si_set_min_samples;
>> >       sctx->b.set_tess_state = si_set_tess_state;
>> >
>> >       sctx->b.set_active_query_state = si_set_active_query_state;
>> >
>> >       si_init_config(sctx);
>> >  }
>> >
>> >  void si_init_screen_state_functions(struct si_screen *sscreen)
>> >  {
>> > diff --git a/src/gallium/drivers/radeonsi/si_state.h
>> > b/src/gallium/drivers/radeonsi/si_state.h
>> > index 767e789276a..6faa4c511b1 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state.h
>> > +++ b/src/gallium/drivers/radeonsi/si_state.h
>> > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
>> > si_context *sctx,
>> >                                         struct si_shader_selector *sel);
>> >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>> >                                            struct pb_slab_entry *entry);
>> >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned
>> > heap,
>> >                                                 unsigned entry_size,
>> >                                                 unsigned group_index);
>> >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
>> > *pslab);
>> >  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
>> > *buf,
>> >                     uint64_t old_va);
>> >  /* si_state.c */
>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>> >  void si_init_state_functions(struct si_context *sctx);
>> >  void si_init_screen_state_functions(struct si_screen *sscreen);
>> >  void
>> >  si_make_buffer_descriptor(struct si_screen *screen, struct si_resource
>> > *buf,
>> >                         enum pipe_format format,
>> >                         unsigned offset, unsigned size,
>> >                         uint32_t *state);
>> >  void
>> >  si_make_texture_descriptor(struct si_screen *screen,
>> >                          struct si_texture *tex,
>> > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>> > b/src/gallium/drivers/radeonsi/si_state_draw.c
>> > index 9c968e39c2c..2a514f144b9 100644
>> > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
>> > si_context *sctx,
>> >
>>  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>> >               }
>> >       }
>> >  }
>> >
>> >  static void si_emit_surface_sync(struct si_context *sctx,
>> >                                unsigned cp_coher_cntl)
>> >  {
>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> >
>> > -     if (sctx->chip_class >= GFX9) {
>> > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>> >               /* Flush caches and wait for the caches to assert idle. */
>> >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>> >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
>> >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
>> >               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
>> >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
>> >       } else {
>> >               /* ACQUIRE_MEM is only required on a compute ring. */
>> > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
>> > si_context *sctx,
>> >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
>> >               radeon_emit(cs, 0);               /* CP_COHER_BASE */
>> >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
>> >       }
>> >  }
>> >
>> >  void si_emit_cache_flush(struct si_context *sctx)
>> >  {
>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> >       uint32_t flags = sctx->flags;
>> > +
>> > +     if (!sctx->has_graphics) {
>> > +             /* Only process compute flags. */
>> > +             flags &= SI_CONTEXT_INV_ICACHE |
>> > +                      SI_CONTEXT_INV_SMEM_L1 |
>> > +                      SI_CONTEXT_INV_VMEM_L1 |
>> > +                      SI_CONTEXT_INV_GLOBAL_L2 |
>> > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
>> > +                      SI_CONTEXT_INV_L2_METADATA |
>> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
>> > +     }
>> > +
>> >       uint32_t cp_coher_cntl = 0;
>> >       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>> >                                       SI_CONTEXT_FLUSH_AND_INV_DB);
>> >
>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>> >               sctx->num_cb_cache_flushes++;
>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>> >               sctx->num_db_cache_flushes++;
>> >
>> >       /* SI has a bug that it always flushes ICACHE and KCACHE if either
>> > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
>> > *sctx)
>> >                                 EOP_DATA_SEL_VALUE_32BIT,
>> >                                 sctx->wait_mem_scratch, va,
>> >                                 sctx->wait_mem_number, SI_NOT_QUERY);
>> >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
>> 0xffffffff,
>> >                              WAIT_REG_MEM_EQUAL);
>> >       }
>> >
>> >       /* Make sure ME is idle (it executes most packets) before
>> continuing.
>> >        * This prevents read-after-write hazards between PFP and ME.
>> >        */
>> > -     if (cp_coher_cntl ||
>> > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>> > -                         SI_CONTEXT_INV_VMEM_L1 |
>> > -                         SI_CONTEXT_INV_GLOBAL_L2 |
>> > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
>> > +     if (sctx->has_graphics &&
>> > +         (cp_coher_cntl ||
>> > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>> > +                    SI_CONTEXT_INV_VMEM_L1 |
>> > +                    SI_CONTEXT_INV_GLOBAL_L2 |
>> > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>> >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>> >               radeon_emit(cs, 0);
>> >       }
>> >
>> >       /* SI-CI-VI only:
>> >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
>> > SURFACE_SYNC
>> >        *   waits for idle, so it should be last. SURFACE_SYNC is done
>> in
>> > PFP.
>> >        *
>> >        * cp_coher_cntl should contain all necessary flags except TC
>> flags
>> >        * at this point.
>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
>> > b/src/gallium/drivers/radeonsi/si_texture.c
>> > index a50088d2d8f..581f90a7b2f 100644
>> > --- a/src/gallium/drivers/radeonsi/si_texture.c
>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
>> > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
>> > si_screen *sscreen,
>> >   *   compressed tiled
>> >   *
>> >   * \param sctx  the current context if you have one, or
>> > sscreen->aux_context
>> >   *              if you don't.
>> >   */
>> >  bool si_texture_disable_dcc(struct si_context *sctx,
>> >                           struct si_texture *tex)
>> >  {
>> >       struct si_screen *sscreen = sctx->screen;
>> >
>> > +     if (!sctx->has_graphics)
>> > +             return si_texture_discard_dcc(sscreen, tex);
>> > +
>> >       if (!si_can_disable_dcc(tex))
>> >               return false;
>> >
>> >       if (&sctx->b == sscreen->aux_context)
>> >               mtx_lock(&sscreen->aux_context_lock);
>> >
>> >       /* Decompress DCC. */
>> >       si_decompress_dcc(sctx, tex);
>> >       sctx->b.flush(&sctx->b, NULL, 0);
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
I ran a simple test verifying that compute is working properly on the
compute ring.

When clover is using compute rings, it doesn't stall/block graphics
operations.

Marek

On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely <jan.vesely@rutgers.edu> wrote:

> Can you add a bit of background why clover should/should not use other
> rings?
>
> I planned to test this, but my raven system can't run clover since kernel
> 4.20 release (BZ 109649), so I need to bisect that first.
> Can this patch help address the soft lockup issue on CIK (BZ 108879)?
> presumably, it was tested using clover on CIK, right?
>
> Jan
>
> On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo@gmail.com> wrote:
>
>> I'll just push it.
>>
>> Marek
>>
>> On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de>
>> wrote:
>>
>>> Hello Marek,
>>>
>>> this series need a rebase (if you have some time).
>>>
>>> Dieter
>>>
>>> Am 12.02.2019 19:12, schrieb Marek Olšák:
>>> > From: Marek Olšák <marek.olsak@amd.com>
>>> >
>>> > initialize all non-compute context functions to NULL.
>>> >
>>> > v2: fix SI
>>> > ---
>>> >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
>>> >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
>>> >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
>>> >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>>> >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
>>> >  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
>>> >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
>>> >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
>>> >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
>>> >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
>>> >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
>>> >  11 files changed, 130 insertions(+), 75 deletions(-)
>>> >
>>> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>>> > b/src/gallium/drivers/radeonsi/si_blit.c
>>> > index bb8d1cbd12d..f39cb5d143f 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_blit.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
>>> > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
>>> > pipe_context *ctx,
>>> >
>>> >               if (separate_dcc_dirty) {
>>> >                       tex->separate_dcc_dirty = false;
>>> >                       vi_separate_dcc_process_and_reset_stats(ctx,
>>> tex);
>>> >               }
>>> >       }
>>> >  }
>>> >
>>> >  void si_decompress_dcc(struct si_context *sctx, struct si_texture
>>> > *tex)
>>> >  {
>>> > -     if (!tex->dcc_offset)
>>> > +     /* If graphics is disabled, we can't decompress DCC, but it
>>> shouldn't
>>> > +      * be compressed either. The caller should simply discard it.
>>> > +      */
>>> > +     if (!tex->dcc_offset || !sctx->has_graphics)
>>> >               return;
>>> >
>>> >       si_blit_decompress_color(sctx, tex, 0,
>>> tex->buffer.b.b.last_level,
>>> >                                0, util_max_layer(&tex->buffer.b.b, 0),
>>> >                                true);
>>> >  }
>>> >
>>> >  void si_init_blit_functions(struct si_context *sctx)
>>> >  {
>>> >       sctx->b.resource_copy_region = si_resource_copy_region;
>>> > -     sctx->b.blit = si_blit;
>>> > -     sctx->b.flush_resource = si_flush_resource;
>>> > -     sctx->b.generate_mipmap = si_generate_mipmap;
>>> > +
>>> > +     if (sctx->has_graphics) {
>>> > +             sctx->b.blit = si_blit;
>>> > +             sctx->b.flush_resource = si_flush_resource;
>>> > +             sctx->b.generate_mipmap = si_generate_mipmap;
>>> > +     }
>>> >  }
>>> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
>>> > b/src/gallium/drivers/radeonsi/si_clear.c
>>> > index 9a00bb73b94..e1805f2a1c9 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_clear.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
>>> > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
>>> > *pipe,
>>> >                       util_clear_render_target(pipe, sf, &color,
>>> >                                                box->x, box->y,
>>> >                                                box->width,
>>> box->height);
>>> >               }
>>> >       }
>>> >       pipe_surface_reference(&sf, NULL);
>>> >  }
>>> >
>>> >  void si_init_clear_functions(struct si_context *sctx)
>>> >  {
>>> > -     sctx->b.clear = si_clear;
>>> >       sctx->b.clear_render_target = si_clear_render_target;
>>> > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>>> >       sctx->b.clear_texture = si_clear_texture;
>>> > +
>>> > +     if (sctx->has_graphics) {
>>> > +             sctx->b.clear = si_clear;
>>> > +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;
>>> > +     }
>>> >  }
>>> > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
>>> > b/src/gallium/drivers/radeonsi/si_compute.c
>>> > index 1a62b3e0844..87addd53976 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_compute.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_compute.c
>>> > @@ -880,26 +880,28 @@ static void si_launch_grid(
>>> >               info->block[0] * info->block[1] * info->block[2] > 256;
>>> >
>>> >       if (cs_regalloc_hang)
>>> >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>>> >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> >
>>> >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
>>> >           program->shader.compilation_failed)
>>> >               return;
>>> >
>>> > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>>> > -             si_update_fb_dirtiness_after_rendering(sctx);
>>> > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
>>> > -     }
>>> > +     if (sctx->has_graphics) {
>>> > +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
>>> > +                     si_update_fb_dirtiness_after_rendering(sctx);
>>> > +                     sctx->last_num_draw_calls = sctx->num_draw_calls;
>>> > +             }
>>> >
>>> > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>>> > +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>>> > +     }
>>> >
>>> >       /* Add buffer sizes for memory checking in need_cs_space. */
>>> >       si_context_add_resource_size(sctx, &program->shader.bo->b.b);
>>> >       /* TODO: add the scratch buffer */
>>> >
>>> >       if (info->indirect) {
>>> >               si_context_add_resource_size(sctx, info->indirect);
>>> >
>>> >               /* Indirect buffers use TC L2 on GFX9, but not older hw.
>>> */
>>> >               if (sctx->chip_class <= VI &&
>>> > @@ -917,21 +919,22 @@ static void si_launch_grid(
>>> >       if (sctx->flags)
>>> >               si_emit_cache_flush(sctx);
>>> >
>>> >       if (!si_switch_compute_shader(sctx, program, &program->shader,
>>> >                                       code_object, info->pc))
>>> >               return;
>>> >
>>> >       si_upload_compute_shader_descriptors(sctx);
>>> >       si_emit_compute_shader_pointers(sctx);
>>> >
>>> > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>>> > +     if (sctx->has_graphics &&
>>> > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
>>> >               sctx->atoms.s.render_cond.emit(sctx);
>>> >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
>>> false);
>>> >       }
>>> >
>>> >       if ((program->input_size ||
>>> >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
>>> >             unlikely(!si_upload_compute_input(sctx, code_object,
>>> > info))) {
>>> >               return;
>>> >       }
>>> >
>>> > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > index 21d4ca946d3..0f22c55723c 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> > @@ -2640,22 +2640,24 @@ void
>>> > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
>>> >
>>> >       sctx->num_resident_handles += num_resident_tex_handles +
>>> >                                       num_resident_img_handles;
>>> >  }
>>> >
>>> >  /* INIT/DEINIT/UPLOAD */
>>> >
>>> >  void si_init_all_descriptors(struct si_context *sctx)
>>> >  {
>>> >       int i;
>>> > +     unsigned first_shader =
>>> > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
>>> >
>>> > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
>>> > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
>>> >               bool is_2nd = sctx->chip_class >= GFX9 &&
>>> >                                    (i == PIPE_SHADER_TESS_CTRL ||
>>> >                                     i == PIPE_SHADER_GEOMETRY);
>>> >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
>>> SI_NUM_SAMPLERS;
>>> >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
>>> > SI_NUM_CONST_BUFFERS;
>>> >               int rel_dw_offset;
>>> >               struct si_descriptors *desc;
>>> >
>>> >               if (is_2nd) {
>>> >                       if (i == PIPE_SHADER_TESS_CTRL) {
>>> > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
>>> > *sctx)
>>> >       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
>>> >
>>> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
>>> >                                    1024);
>>> >
>>> >       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>>> >
>>> >       /* Set pipe_context functions. */
>>> >       sctx->b.bind_sampler_states = si_bind_sampler_states;
>>> >       sctx->b.set_shader_images = si_set_shader_images;
>>> >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
>>> > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>>> >       sctx->b.set_shader_buffers = si_set_shader_buffers;
>>> >       sctx->b.set_sampler_views = si_set_sampler_views;
>>> >       sctx->b.create_texture_handle = si_create_texture_handle;
>>> >       sctx->b.delete_texture_handle = si_delete_texture_handle;
>>> >       sctx->b.make_texture_handle_resident =
>>> > si_make_texture_handle_resident;
>>> >       sctx->b.create_image_handle = si_create_image_handle;
>>> >       sctx->b.delete_image_handle = si_delete_image_handle;
>>> >       sctx->b.make_image_handle_resident =
>>> si_make_image_handle_resident;
>>> >
>>> > +     if (!sctx->has_graphics)
>>> > +             return;
>>> > +
>>> > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
>>> > +
>>> >       /* Shader user data. */
>>> >       sctx->atoms.s.shader_pointers.emit =
>>> > si_emit_graphics_shader_pointers;
>>> >
>>> >       /* Set default and immutable mappings. */
>>> >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
>>> > R_00B130_SPI_SHADER_USER_DATA_VS_0);
>>> >
>>> >       if (sctx->chip_class >= GFX9) {
>>> >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
>>> >
>>>  R_00B430_SPI_SHADER_USER_DATA_LS_0);
>>> >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
>>> > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > index 3d64587fa2b..d0e7cf20b4c 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
>>> > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
>>> > unsigned flags,
>>> >        * This code is only needed when the driver flushes the GFX IB
>>> >        * internally, and it never asks for a fence handle.
>>> >        */
>>> >       if (radeon_emitted(ctx->dma_cs, 0)) {
>>> >               assert(fence == NULL); /* internal flushes only */
>>> >               si_flush_dma_cs(ctx, flags, NULL);
>>> >       }
>>> >
>>> >       ctx->gfx_flush_in_progress = true;
>>> >
>>> > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
>>> > -             si_suspend_queries(ctx);
>>> > -
>>> > -     ctx->streamout.suspended = false;
>>> > -     if (ctx->streamout.begin_emitted) {
>>> > -             si_emit_streamout_end(ctx);
>>> > -             ctx->streamout.suspended = true;
>>> > +     if (ctx->has_graphics) {
>>> > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
>>> > +                     si_suspend_queries(ctx);
>>> > +
>>> > +             ctx->streamout.suspended = false;
>>> > +             if (ctx->streamout.begin_emitted) {
>>> > +                     si_emit_streamout_end(ctx);
>>> > +                     ctx->streamout.suspended = true;
>>> > +             }
>>> >       }
>>> >
>>> >       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
>>> >        * because the kernel doesn't wait for it. */
>>> >       if (ctx->chip_class >= CIK)
>>> >               si_cp_dma_wait_for_idle(ctx);
>>> >
>>> >       /* Wait for draw calls to finish if needed. */
>>> >       if (wait_flags) {
>>> >               ctx->flags |= wait_flags;
>>> > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> >        * IB starts drawing.
>>> >        *
>>> >        * TODO: Do we also need to invalidate CB & DB caches?
>>> >        */
>>> >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
>>> >                     SI_CONTEXT_INV_SMEM_L1 |
>>> >                     SI_CONTEXT_INV_VMEM_L1 |
>>> >                     SI_CONTEXT_INV_GLOBAL_L2 |
>>> >                     SI_CONTEXT_START_PIPELINE_STATS;
>>> >
>>> > +     ctx->cs_shader_state.initialized = false;
>>> > +     si_all_descriptors_begin_new_cs(ctx);
>>> > +     si_all_resident_buffers_begin_new_cs(ctx);
>>> > +
>>> > +     if (!ctx->has_graphics) {
>>> > +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
>>> > +             return;
>>> > +     }
>>> > +
>>> >       /* set all valid group as dirty so they get reemited on
>>> >        * next draw command
>>> >        */
>>> >       si_pm4_reset_emitted(ctx);
>>> >
>>> >       /* The CS initialization should be emitted before everything
>>> else. */
>>> >       si_pm4_emit(ctx, ctx->init_config);
>>> >       if (ctx->init_config_gs_rings)
>>> >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
>>> >
>>> > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
>>> >       if (ctx->chip_class >= GFX9)
>>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
>>> >       /* CLEAR_STATE disables all window rectangles. */
>>> >       if (!has_clear_state || ctx->num_window_rectangles > 0)
>>> >               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
>>> > -     si_all_descriptors_begin_new_cs(ctx);
>>> > -     si_all_resident_buffers_begin_new_cs(ctx);
>>> >
>>> >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>>> >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
>>> >       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS)
>>> - 1;
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
>>> >
>>> >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
>>> >       if (ctx->scratch_buffer) {
>>> > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
>>> >       ctx->last_multi_vgt_param = -1;
>>> >       ctx->last_rast_prim = -1;
>>> >       ctx->last_sc_line_stipple = ~0;
>>> >       ctx->last_vs_state = ~0;
>>> >       ctx->last_ls = NULL;
>>> >       ctx->last_tcs = NULL;
>>> >       ctx->last_tes_sh_base = -1;
>>> >       ctx->last_num_tcs_input_cp = -1;
>>> >       ctx->last_ls_hs_config = -1; /* impossible value */
>>> >
>>> > -     ctx->cs_shader_state.initialized = false;
>>> > -
>>> >       if (has_clear_state) {
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] =
>>> > 0x00000000;
>>> >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
>>> =
>>> > 0x00000000;
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
>>> > 0x00000000;
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] =
>>> > 0x00000000;
>>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
>>> 0xffffffff;
>>> >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
>>> 0x00000000;
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] =
>>> > 0x00000000;
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
>>> > 0x00000000;
>>> >
>>>  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
>>> > 0x00000000;
>>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>>> > b/src/gallium/drivers/radeonsi/si_pipe.c
>>> > index 20767c806d2..c2ec664d5a4 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> > @@ -381,61 +381,56 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> >  {
>>> >       struct si_context *sctx = CALLOC_STRUCT(si_context);
>>> >       struct si_screen* sscreen = (struct si_screen *)screen;
>>> >       struct radeon_winsys *ws = sscreen->ws;
>>> >       int shader, i;
>>> >       bool stop_exec_on_failure = (flags &
>>> > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
>>> >
>>> >       if (!sctx)
>>> >               return NULL;
>>> >
>>> > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
>>> > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
>>> > +
>>> >       if (flags & PIPE_CONTEXT_DEBUG)
>>> >               sscreen->record_llvm_ir = true; /* racy but not critical
>>> */
>>> >
>>> >       sctx->b.screen = screen; /* this must be set first */
>>> >       sctx->b.priv = NULL;
>>> >       sctx->b.destroy = si_destroy_context;
>>> > -     sctx->b.emit_string_marker = si_emit_string_marker;
>>> > -     sctx->b.set_debug_callback = si_set_debug_callback;
>>> > -     sctx->b.set_log_context = si_set_log_context;
>>> > -     sctx->b.set_context_param = si_set_context_param;
>>> >       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>>> >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
>>> >
>>> >       slab_create_child(&sctx->pool_transfers,
>>> &sscreen->pool_transfers);
>>> >       slab_create_child(&sctx->pool_transfers_unsync,
>>> > &sscreen->pool_transfers);
>>> >
>>> >       sctx->ws = sscreen->ws;
>>> >       sctx->family = sscreen->info.family;
>>> >       sctx->chip_class = sscreen->info.chip_class;
>>> >
>>> >       if (sscreen->info.has_gpu_reset_counter_query) {
>>> >               sctx->gpu_reset_counter =
>>> >                       sctx->ws->query_value(sctx->ws,
>>> RADEON_GPU_RESET_COUNTER);
>>> >       }
>>> >
>>> > -     sctx->b.get_device_reset_status = si_get_reset_status;
>>> > -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>>> > -
>>> > -     si_init_context_texture_functions(sctx);
>>> > -     si_init_query_functions(sctx);
>>> >
>>> >       if (sctx->chip_class == CIK ||
>>> >           sctx->chip_class == VI ||
>>> >           sctx->chip_class == GFX9) {
>>> >               sctx->eop_bug_scratch = si_resource(
>>> >                       pipe_buffer_create(&sscreen->b, 0,
>>> PIPE_USAGE_DEFAULT,
>>> >                                          16 *
>>> sscreen->info.num_render_backends));
>>> >               if (!sctx->eop_bug_scratch)
>>> >                       goto fail;
>>> >       }
>>> >
>>> > +     /* Initialize context allocators. */
>>> >       sctx->allocator_zeroed_memory =
>>> >               u_suballocator_create(&sctx->b, 128 * 1024,
>>> >                                     0, PIPE_USAGE_DEFAULT,
>>> >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
>>> >                                     SI_RESOURCE_FLAG_CLEAR, false);
>>> >       if (!sctx->allocator_zeroed_memory)
>>> >               goto fail;
>>> >
>>> >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>>> >                                                   0, PIPE_USAGE_STREAM,
>>> > @@ -459,38 +454,22 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>>> >       if (!sctx->ctx)
>>> >               goto fail;
>>> >
>>> >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
>>> > DBG(NO_ASYNC_DMA))) {
>>> >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>>> >
>>> (void*)si_flush_dma_cs,
>>> >                                                  sctx,
>>> stop_exec_on_failure);
>>> >       }
>>> >
>>> > -     si_init_buffer_functions(sctx);
>>> > -     si_init_clear_functions(sctx);
>>> > -     si_init_blit_functions(sctx);
>>> > -     si_init_compute_functions(sctx);
>>> > -     si_init_compute_blit_functions(sctx);
>>> > -     si_init_debug_functions(sctx);
>>> > -     si_init_msaa_functions(sctx);
>>> > -     si_init_streamout_functions(sctx);
>>> > -
>>> > -     if (sscreen->info.has_hw_decode) {
>>> > -             sctx->b.create_video_codec = si_uvd_create_decoder;
>>> > -             sctx->b.create_video_buffer = si_video_buffer_create;
>>> > -     } else {
>>> > -             sctx->b.create_video_codec = vl_create_decoder;
>>> > -             sctx->b.create_video_buffer = vl_video_buffer_create;
>>> > -     }
>>> > -
>>> > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
>>> > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
>>> > +                                  sctx->has_graphics ? RING_GFX :
>>> RING_COMPUTE,
>>> >                                    (void*)si_flush_gfx_cs, sctx,
>>> stop_exec_on_failure);
>>> >
>>> >       /* Border colors. */
>>> >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>>> >
>>>  sizeof(*sctx->border_color_table));
>>> >       if (!sctx->border_color_table)
>>> >               goto fail;
>>> >
>>> >       sctx->border_color_buffer = si_resource(
>>> >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
>>> > @@ -498,43 +477,76 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> >                                  sizeof(*sctx->border_color_table)));
>>> >       if (!sctx->border_color_buffer)
>>> >               goto fail;
>>> >
>>> >       sctx->border_color_map =
>>> >               ws->buffer_map(sctx->border_color_buffer->buf,
>>> >                              NULL, PIPE_TRANSFER_WRITE);
>>> >       if (!sctx->border_color_map)
>>> >               goto fail;
>>> >
>>> > +     /* Initialize context functions used by graphics and compute. */
>>> > +     sctx->b.emit_string_marker = si_emit_string_marker;
>>> > +     sctx->b.set_debug_callback = si_set_debug_callback;
>>> > +     sctx->b.set_log_context = si_set_log_context;
>>> > +     sctx->b.set_context_param = si_set_context_param;
>>> > +     sctx->b.get_device_reset_status = si_get_reset_status;
>>> > +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
>>> > +     sctx->b.memory_barrier = si_memory_barrier;
>>> > +
>>> >       si_init_all_descriptors(sctx);
>>> > +     si_init_buffer_functions(sctx);
>>> > +     si_init_clear_functions(sctx);
>>> > +     si_init_blit_functions(sctx);
>>> > +     si_init_compute_functions(sctx);
>>> > +     si_init_compute_blit_functions(sctx);
>>> > +     si_init_debug_functions(sctx);
>>> >       si_init_fence_functions(sctx);
>>> > -     si_init_state_functions(sctx);
>>> > -     si_init_shader_functions(sctx);
>>> > -     si_init_viewport_functions(sctx);
>>> > -
>>> > -     if (sctx->chip_class >= CIK)
>>> > -             cik_init_sdma_functions(sctx);
>>> > -     else
>>> > -             si_init_dma_functions(sctx);
>>> >
>>> >       if (sscreen->debug_flags & DBG(FORCE_DMA))
>>> >               sctx->b.resource_copy_region = sctx->dma_copy;
>>> >
>>> > -     sctx->blitter = util_blitter_create(&sctx->b);
>>> > -     if (sctx->blitter == NULL)
>>> > -             goto fail;
>>> > -     sctx->blitter->skip_viewport_restore = true;
>>> > +     /* Initialize graphics-only context functions. */
>>> > +     if (sctx->has_graphics) {
>>> > +             si_init_context_texture_functions(sctx);
>>> > +             si_init_query_functions(sctx);
>>> > +             si_init_msaa_functions(sctx);
>>> > +             si_init_shader_functions(sctx);
>>> > +             si_init_state_functions(sctx);
>>> > +             si_init_streamout_functions(sctx);
>>> > +             si_init_viewport_functions(sctx);
>>> > +
>>> > +             sctx->blitter = util_blitter_create(&sctx->b);
>>> > +             if (sctx->blitter == NULL)
>>> > +                     goto fail;
>>> > +             sctx->blitter->skip_viewport_restore = true;
>>> >
>>> > -     si_init_draw_functions(sctx);
>>> > +             si_init_draw_functions(sctx);
>>> > +     }
>>> > +
>>> > +     /* Initialize SDMA functions. */
>>> > +     if (sctx->chip_class >= CIK)
>>> > +             cik_init_sdma_functions(sctx);
>>> > +     else
>>> > +             si_init_dma_functions(sctx);
>>> >
>>> >       sctx->sample_mask = 0xffff;
>>> >
>>> > +     /* Initialize multimedia functions. */
>>> > +     if (sscreen->info.has_hw_decode) {
>>> > +             sctx->b.create_video_codec = si_uvd_create_decoder;
>>> > +             sctx->b.create_video_buffer = si_video_buffer_create;
>>> > +     } else {
>>> > +             sctx->b.create_video_codec = vl_create_decoder;
>>> > +             sctx->b.create_video_buffer = vl_video_buffer_create;
>>> > +     }
>>> > +
>>> >       if (sctx->chip_class >= GFX9) {
>>> >               sctx->wait_mem_scratch = si_resource(
>>> >                       pipe_buffer_create(screen, 0,
>>> PIPE_USAGE_DEFAULT, 4));
>>> >               if (!sctx->wait_mem_scratch)
>>> >                       goto fail;
>>> >
>>> >               /* Initialize the memory. */
>>> >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>>> >                                V_370_MEM, V_370_ME,
>>> &sctx->wait_mem_number);
>>> >       }
>>> > @@ -544,21 +556,22 @@ static struct pipe_context
>>> > *si_create_context(struct pipe_screen *screen,
>>> >       if (sctx->chip_class == CIK) {
>>> >               sctx->null_const_buf.buffer =
>>> >                       pipe_aligned_buffer_create(screen,
>>> >
>>> SI_RESOURCE_FLAG_32BIT,
>>> >                                                  PIPE_USAGE_DEFAULT,
>>> 16,
>>> >
>>> sctx->screen->info.tcc_cache_line_size);
>>> >               if (!sctx->null_const_buf.buffer)
>>> >                       goto fail;
>>> >               sctx->null_const_buf.buffer_size =
>>> > sctx->null_const_buf.buffer->width0;
>>> >
>>> > -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>>> > +             unsigned start_shader = sctx->has_graphics ? 0 :
>>> > PIPE_SHADER_COMPUTE;
>>> > +             for (shader = start_shader; shader < SI_NUM_SHADERS;
>>> shader++) {
>>> >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>>> >                               sctx->b.set_constant_buffer(&sctx->b,
>>> shader, i,
>>> >
>>>  &sctx->null_const_buf);
>>> >                       }
>>> >               }
>>> >
>>> >               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>>> >                                &sctx->null_const_buf);
>>> >               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>>> >                                &sctx->null_const_buf);
>>> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>> > b/src/gallium/drivers/radeonsi/si_pipe.h
>>> > index b01d5744752..348e8e5bd26 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>> > @@ -777,21 +777,21 @@ struct si_saved_cs {
>>> >  };
>>> >
>>> >  struct si_context {
>>> >       struct pipe_context             b; /* base class */
>>> >
>>> >       enum radeon_family              family;
>>> >       enum chip_class                 chip_class;
>>> >
>>> >       struct radeon_winsys            *ws;
>>> >       struct radeon_winsys_ctx        *ctx;
>>> > -     struct radeon_cmdbuf            *gfx_cs;
>>> > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
>>> graphics is disabled
>>> > */
>>> >       struct radeon_cmdbuf            *dma_cs;
>>> >       struct pipe_fence_handle        *last_gfx_fence;
>>> >       struct pipe_fence_handle        *last_sdma_fence;
>>> >       struct si_resource              *eop_bug_scratch;
>>> >       struct u_upload_mgr             *cached_gtt_allocator;
>>> >       struct threaded_context         *tc;
>>> >       struct u_suballocator           *allocator_zeroed_memory;
>>> >       struct slab_child_pool          pool_transfers;
>>> >       struct slab_child_pool          pool_transfers_unsync; /* for
>>> > threaded_context */
>>> >       struct pipe_device_reset_callback device_reset_callback;
>>> > @@ -815,20 +815,21 @@ struct si_context {
>>> >       void                            *cs_clear_render_target;
>>> >       void                            *cs_clear_render_target_1d_array;
>>> >       struct si_screen                *screen;
>>> >       struct pipe_debug_callback      debug;
>>> >       struct ac_llvm_compiler         compiler; /* only non-threaded
>>> compilation
>>> > */
>>> >       struct si_shader_ctx_state      fixed_func_tcs_shader;
>>> >       struct si_resource              *wait_mem_scratch;
>>> >       unsigned                        wait_mem_number;
>>> >       uint16_t                        prefetch_L2_mask;
>>> >
>>> > +     bool                            has_graphics;
>>> >       bool                            gfx_flush_in_progress:1;
>>> >       bool                            gfx_last_ib_is_busy:1;
>>> >       bool                            compute_is_busy:1;
>>> >
>>> >       unsigned                        num_gfx_cs_flushes;
>>> >       unsigned                        initial_gfx_cs_size;
>>> >       unsigned                        gpu_reset_counter;
>>> >       unsigned                        last_dirty_tex_counter;
>>> >       unsigned                        last_compressed_colortex_counter;
>>> >       unsigned                        last_num_draw_calls;
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
>>> > b/src/gallium/drivers/radeonsi/si_state.c
>>> > index b49a1b3695e..458b108a7e3 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
>>> > pipe_context *ctx, unsigned flags)
>>> >
>>> >       si_update_fb_dirtiness_after_rendering(sctx);
>>> >
>>> >       /* Multisample surfaces are flushed in si_decompress_textures. */
>>> >       if (sctx->framebuffer.uncompressed_cb_mask)
>>> >               si_make_CB_shader_coherent(sctx,
>>> sctx->framebuffer.nr_samples,
>>> >
>>> sctx->framebuffer.CB_has_shader_readable_metadata);
>>> >  }
>>> >
>>> >  /* This only ensures coherency for shader image/buffer stores. */
>>> > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
>>> > flags)
>>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>>> >  {
>>> >       struct si_context *sctx = (struct si_context *)ctx;
>>> >
>>> >       /* Subsequent commands must wait for all shader invocations to
>>> >        * complete. */
>>> >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>>> >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> >
>>> >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>>> >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
>>> > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
>>> > *sctx)
>>> >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
>>> >
>>> >       sctx->b.set_sample_mask = si_set_sample_mask;
>>> >
>>> >       sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>>> >       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>>> >       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>>> >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
>>> >
>>> >       sctx->b.texture_barrier = si_texture_barrier;
>>> > -     sctx->b.memory_barrier = si_memory_barrier;
>>> >       sctx->b.set_min_samples = si_set_min_samples;
>>> >       sctx->b.set_tess_state = si_set_tess_state;
>>> >
>>> >       sctx->b.set_active_query_state = si_set_active_query_state;
>>> >
>>> >       si_init_config(sctx);
>>> >  }
>>> >
>>> >  void si_init_screen_state_functions(struct si_screen *sscreen)
>>> >  {
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state.h
>>> > b/src/gallium/drivers/radeonsi/si_state.h
>>> > index 767e789276a..6faa4c511b1 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state.h
>>> > +++ b/src/gallium/drivers/radeonsi/si_state.h
>>> > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
>>> > si_context *sctx,
>>> >                                         struct si_shader_selector
>>> *sel);
>>> >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>>> >                                            struct pb_slab_entry
>>> *entry);
>>> >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv,
>>> unsigned
>>> > heap,
>>> >                                                 unsigned entry_size,
>>> >                                                 unsigned group_index);
>>> >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
>>> > *pslab);
>>> >  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
>>> > *buf,
>>> >                     uint64_t old_va);
>>> >  /* si_state.c */
>>> > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>>> >  void si_init_state_functions(struct si_context *sctx);
>>> >  void si_init_screen_state_functions(struct si_screen *sscreen);
>>> >  void
>>> >  si_make_buffer_descriptor(struct si_screen *screen, struct
>>> si_resource
>>> > *buf,
>>> >                         enum pipe_format format,
>>> >                         unsigned offset, unsigned size,
>>> >                         uint32_t *state);
>>> >  void
>>> >  si_make_texture_descriptor(struct si_screen *screen,
>>> >                          struct si_texture *tex,
>>> > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > index 9c968e39c2c..2a514f144b9 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
>>> > si_context *sctx,
>>> >
>>>  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>>> >               }
>>> >       }
>>> >  }
>>> >
>>> >  static void si_emit_surface_sync(struct si_context *sctx,
>>> >                                unsigned cp_coher_cntl)
>>> >  {
>>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>>> >
>>> > -     if (sctx->chip_class >= GFX9) {
>>> > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>>> >               /* Flush caches and wait for the caches to assert idle.
>>> */
>>> >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>>> >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
>>> >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
>>> >               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
>>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
>>> >               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
>>> >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
>>> >       } else {
>>> >               /* ACQUIRE_MEM is only required on a compute ring. */
>>> > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
>>> > si_context *sctx,
>>> >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
>>> >               radeon_emit(cs, 0);               /* CP_COHER_BASE */
>>> >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
>>> >       }
>>> >  }
>>> >
>>> >  void si_emit_cache_flush(struct si_context *sctx)
>>> >  {
>>> >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>>> >       uint32_t flags = sctx->flags;
>>> > +
>>> > +     if (!sctx->has_graphics) {
>>> > +             /* Only process compute flags. */
>>> > +             flags &= SI_CONTEXT_INV_ICACHE |
>>> > +                      SI_CONTEXT_INV_SMEM_L1 |
>>> > +                      SI_CONTEXT_INV_VMEM_L1 |
>>> > +                      SI_CONTEXT_INV_GLOBAL_L2 |
>>> > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
>>> > +                      SI_CONTEXT_INV_L2_METADATA |
>>> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
>>> > +     }
>>> > +
>>> >       uint32_t cp_coher_cntl = 0;
>>> >       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>>> >                                       SI_CONTEXT_FLUSH_AND_INV_DB);
>>> >
>>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>>> >               sctx->num_cb_cache_flushes++;
>>> >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>>> >               sctx->num_db_cache_flushes++;
>>> >
>>> >       /* SI has a bug that it always flushes ICACHE and KCACHE if
>>> either
>>> > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
>>> > *sctx)
>>> >                                 EOP_DATA_SEL_VALUE_32BIT,
>>> >                                 sctx->wait_mem_scratch, va,
>>> >                                 sctx->wait_mem_number, SI_NOT_QUERY);
>>> >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
>>> 0xffffffff,
>>> >                              WAIT_REG_MEM_EQUAL);
>>> >       }
>>> >
>>> >       /* Make sure ME is idle (it executes most packets) before
>>> continuing.
>>> >        * This prevents read-after-write hazards between PFP and ME.
>>> >        */
>>> > -     if (cp_coher_cntl ||
>>> > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>>> > -                         SI_CONTEXT_INV_VMEM_L1 |
>>> > -                         SI_CONTEXT_INV_GLOBAL_L2 |
>>> > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
>>> > +     if (sctx->has_graphics &&
>>> > +         (cp_coher_cntl ||
>>> > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>>> > +                    SI_CONTEXT_INV_VMEM_L1 |
>>> > +                    SI_CONTEXT_INV_GLOBAL_L2 |
>>> > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>>> >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>>> >               radeon_emit(cs, 0);
>>> >       }
>>> >
>>> >       /* SI-CI-VI only:
>>> >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
>>> > SURFACE_SYNC
>>> >        *   waits for idle, so it should be last. SURFACE_SYNC is done
>>> in
>>> > PFP.
>>> >        *
>>> >        * cp_coher_cntl should contain all necessary flags except TC
>>> flags
>>> >        * at this point.
>>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
>>> > b/src/gallium/drivers/radeonsi/si_texture.c
>>> > index a50088d2d8f..581f90a7b2f 100644
>>> > --- a/src/gallium/drivers/radeonsi/si_texture.c
>>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c
>>> > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
>>> > si_screen *sscreen,
>>> >   *   compressed tiled
>>> >   *
>>> >   * \param sctx  the current context if you have one, or
>>> > sscreen->aux_context
>>> >   *              if you don't.
>>> >   */
>>> >  bool si_texture_disable_dcc(struct si_context *sctx,
>>> >                           struct si_texture *tex)
>>> >  {
>>> >       struct si_screen *sscreen = sctx->screen;
>>> >
>>> > +     if (!sctx->has_graphics)
>>> > +             return si_texture_discard_dcc(sscreen, tex);
>>> > +
>>> >       if (!si_can_disable_dcc(tex))
>>> >               return false;
>>> >
>>> >       if (&sctx->b == sscreen->aux_context)
>>> >               mtx_lock(&sscreen->aux_context_lock);
>>> >
>>> >       /* Decompress DCC. */
>>> >       si_decompress_dcc(sctx, tex);
>>> >       sctx->b.flush(&sctx->b, NULL, 0);
>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
On Tue, 2019-02-26 at 18:34 -0500, Marek Olšák wrote:
> I ran a simple test verifying that compute is working properly on the
> compute ring.

I guess this was not on raven? With his patch I no loner see gfx
timeout but the apps still hang. anyway that's a separate issue.

> 
> When clover is using compute rings, it doesn't stall/block graphics
> operations.

I'd be nice to include this information in the commit message.

Jan

> 
> Marek
> 
> On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely <jan.vesely@rutgers.edu> wrote:
> 
> > Can you add a bit of background why clover should/should not use other
> > rings?
> > 
> > I planned to test this, but my raven system can't run clover since kernel
> > 4.20 release (BZ 109649), so I need to bisect that first.
> > Can this patch help address the soft lockup issue on CIK (BZ 108879)?
> > presumably, it was tested using clover on CIK, right?
> > 
> > Jan
> > 
> > On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo@gmail.com> wrote:
> > 
> > > I'll just push it.
> > > 
> > > Marek
> > > 
> > > On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de>
> > > wrote:
> > > 
> > > > Hello Marek,
> > > > 
> > > > this series need a rebase (if you have some time).
> > > > 
> > > > Dieter
> > > > 
> > > > Am 12.02.2019 19:12, schrieb Marek Olšák:
> > > > > From: Marek Olšák <marek.olsak@amd.com>
> > > > > 
> > > > > initialize all non-compute context functions to NULL.
> > > > > 
> > > > > v2: fix SI
> > > > > ---
> > > > >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
> > > > >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
> > > > >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
> > > > >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
> > > > >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
> > > > >  src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
> > > > >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
> > > > >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
> > > > >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
> > > > >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
> > > > >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
> > > > >  11 files changed, 130 insertions(+), 75 deletions(-)
> > > > > 
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > index bb8d1cbd12d..f39cb5d143f 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
> > > > > pipe_context *ctx,
> > > > > 
> > > > >               if (separate_dcc_dirty) {
> > > > >                       tex->separate_dcc_dirty = false;
> > > > >                       vi_separate_dcc_process_and_reset_stats(ctx,
> > > > 
> > > > tex);
> > > > >               }
> > > > >       }
> > > > >  }
> > > > > 
> > > > >  void si_decompress_dcc(struct si_context *sctx, struct si_texture
> > > > > *tex)
> > > > >  {
> > > > > -     if (!tex->dcc_offset)
> > > > > +     /* If graphics is disabled, we can't decompress DCC, but it
> > > > 
> > > > shouldn't
> > > > > +      * be compressed either. The caller should simply discard it.
> > > > > +      */
> > > > > +     if (!tex->dcc_offset || !sctx->has_graphics)
> > > > >               return;
> > > > > 
> > > > >       si_blit_decompress_color(sctx, tex, 0,
> > > > 
> > > > tex->buffer.b.b.last_level,
> > > > >                                0, util_max_layer(&tex->buffer.b.b, 0),
> > > > >                                true);
> > > > >  }
> > > > > 
> > > > >  void si_init_blit_functions(struct si_context *sctx)
> > > > >  {
> > > > >       sctx->b.resource_copy_region = si_resource_copy_region;
> > > > > -     sctx->b.blit = si_blit;
> > > > > -     sctx->b.flush_resource = si_flush_resource;
> > > > > -     sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > +
> > > > > +     if (sctx->has_graphics) {
> > > > > +             sctx->b.blit = si_blit;
> > > > > +             sctx->b.flush_resource = si_flush_resource;
> > > > > +             sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > +     }
> > > > >  }
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > index 9a00bb73b94..e1805f2a1c9 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > @@ -764,15 +764,18 @@ static void si_clear_texture(struct pipe_context
> > > > > *pipe,
> > > > >                       util_clear_render_target(pipe, sf, &color,
> > > > >                                                box->x, box->y,
> > > > >                                                box->width,
> > > > 
> > > > box->height);
> > > > >               }
> > > > >       }
> > > > >       pipe_surface_reference(&sf, NULL);
> > > > >  }
> > > > > 
> > > > >  void si_init_clear_functions(struct si_context *sctx)
> > > > >  {
> > > > > -     sctx->b.clear = si_clear;
> > > > >       sctx->b.clear_render_target = si_clear_render_target;
> > > > > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> > > > >       sctx->b.clear_texture = si_clear_texture;
> > > > > +
> > > > > +     if (sctx->has_graphics) {
> > > > > +             sctx->b.clear = si_clear;
> > > > > +             sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> > > > > +     }
> > > > >  }
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > index 1a62b3e0844..87addd53976 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > @@ -880,26 +880,28 @@ static void si_launch_grid(
> > > > >               info->block[0] * info->block[1] * info->block[2] > 256;
> > > > > 
> > > > >       if (cs_regalloc_hang)
> > > > >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > 
> > > > >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
> > > > >           program->shader.compilation_failed)
> > > > >               return;
> > > > > 
> > > > > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > > > > -             si_update_fb_dirtiness_after_rendering(sctx);
> > > > > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
> > > > > -     }
> > > > > +     if (sctx->has_graphics) {
> > > > > +             if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > > > > +                     si_update_fb_dirtiness_after_rendering(sctx);
> > > > > +                     sctx->last_num_draw_calls = sctx->num_draw_calls;
> > > > > +             }
> > > > > 
> > > > > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > > > > +             si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > > > > +     }
> > > > > 
> > > > >       /* Add buffer sizes for memory checking in need_cs_space. */
> > > > >       si_context_add_resource_size(sctx, &program->shader.bo->b.b);
> > > > >       /* TODO: add the scratch buffer */
> > > > > 
> > > > >       if (info->indirect) {
> > > > >               si_context_add_resource_size(sctx, info->indirect);
> > > > > 
> > > > >               /* Indirect buffers use TC L2 on GFX9, but not older hw.
> > > > 
> > > > */
> > > > >               if (sctx->chip_class <= VI &&
> > > > > @@ -917,21 +919,22 @@ static void si_launch_grid(
> > > > >       if (sctx->flags)
> > > > >               si_emit_cache_flush(sctx);
> > > > > 
> > > > >       if (!si_switch_compute_shader(sctx, program, &program->shader,
> > > > >                                       code_object, info->pc))
> > > > >               return;
> > > > > 
> > > > >       si_upload_compute_shader_descriptors(sctx);
> > > > >       si_emit_compute_shader_pointers(sctx);
> > > > > 
> > > > > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > > +     if (sctx->has_graphics &&
> > > > > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > >               sctx->atoms.s.render_cond.emit(sctx);
> > > > >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
> > > > 
> > > > false);
> > > > >       }
> > > > > 
> > > > >       if ((program->input_size ||
> > > > >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
> > > > >             unlikely(!si_upload_compute_input(sctx, code_object,
> > > > > info))) {
> > > > >               return;
> > > > >       }
> > > > > 
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > index 21d4ca946d3..0f22c55723c 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > @@ -2640,22 +2640,24 @@ void
> > > > > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
> > > > > 
> > > > >       sctx->num_resident_handles += num_resident_tex_handles +
> > > > >                                       num_resident_img_handles;
> > > > >  }
> > > > > 
> > > > >  /* INIT/DEINIT/UPLOAD */
> > > > > 
> > > > >  void si_init_all_descriptors(struct si_context *sctx)
> > > > >  {
> > > > >       int i;
> > > > > +     unsigned first_shader =
> > > > > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
> > > > > 
> > > > > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
> > > > > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
> > > > >               bool is_2nd = sctx->chip_class >= GFX9 &&
> > > > >                                    (i == PIPE_SHADER_TESS_CTRL ||
> > > > >                                     i == PIPE_SHADER_GEOMETRY);
> > > > >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
> > > > 
> > > > SI_NUM_SAMPLERS;
> > > > >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
> > > > > SI_NUM_CONST_BUFFERS;
> > > > >               int rel_dw_offset;
> > > > >               struct si_descriptors *desc;
> > > > > 
> > > > >               if (is_2nd) {
> > > > >                       if (i == PIPE_SHADER_TESS_CTRL) {
> > > > > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct si_context
> > > > > *sctx)
> > > > >       si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
> > > > > 
> > > > 
> > > > SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
> > > > >                                    1024);
> > > > > 
> > > > >       sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
> > > > > 
> > > > >       /* Set pipe_context functions. */
> > > > >       sctx->b.bind_sampler_states = si_bind_sampler_states;
> > > > >       sctx->b.set_shader_images = si_set_shader_images;
> > > > >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
> > > > > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > >       sctx->b.set_shader_buffers = si_set_shader_buffers;
> > > > >       sctx->b.set_sampler_views = si_set_sampler_views;
> > > > >       sctx->b.create_texture_handle = si_create_texture_handle;
> > > > >       sctx->b.delete_texture_handle = si_delete_texture_handle;
> > > > >       sctx->b.make_texture_handle_resident =
> > > > > si_make_texture_handle_resident;
> > > > >       sctx->b.create_image_handle = si_create_image_handle;
> > > > >       sctx->b.delete_image_handle = si_delete_image_handle;
> > > > >       sctx->b.make_image_handle_resident =
> > > > 
> > > > si_make_image_handle_resident;
> > > > > 
> > > > > +     if (!sctx->has_graphics)
> > > > > +             return;
> > > > > +
> > > > > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > > +
> > > > >       /* Shader user data. */
> > > > >       sctx->atoms.s.shader_pointers.emit =
> > > > > si_emit_graphics_shader_pointers;
> > > > > 
> > > > >       /* Set default and immutable mappings. */
> > > > >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
> > > > > R_00B130_SPI_SHADER_USER_DATA_VS_0);
> > > > > 
> > > > >       if (sctx->chip_class >= GFX9) {
> > > > >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
> > > > > 
> > > > 
> > > >  R_00B430_SPI_SHADER_USER_DATA_LS_0);
> > > > >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > index 3d64587fa2b..d0e7cf20b4c 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context *ctx,
> > > > > unsigned flags,
> > > > >        * This code is only needed when the driver flushes the GFX IB
> > > > >        * internally, and it never asks for a fence handle.
> > > > >        */
> > > > >       if (radeon_emitted(ctx->dma_cs, 0)) {
> > > > >               assert(fence == NULL); /* internal flushes only */
> > > > >               si_flush_dma_cs(ctx, flags, NULL);
> > > > >       }
> > > > > 
> > > > >       ctx->gfx_flush_in_progress = true;
> > > > > 
> > > > > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > -             si_suspend_queries(ctx);
> > > > > -
> > > > > -     ctx->streamout.suspended = false;
> > > > > -     if (ctx->streamout.begin_emitted) {
> > > > > -             si_emit_streamout_end(ctx);
> > > > > -             ctx->streamout.suspended = true;
> > > > > +     if (ctx->has_graphics) {
> > > > > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > +                     si_suspend_queries(ctx);
> > > > > +
> > > > > +             ctx->streamout.suspended = false;
> > > > > +             if (ctx->streamout.begin_emitted) {
> > > > > +                     si_emit_streamout_end(ctx);
> > > > > +                     ctx->streamout.suspended = true;
> > > > > +             }
> > > > >       }
> > > > > 
> > > > >       /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
> > > > >        * because the kernel doesn't wait for it. */
> > > > >       if (ctx->chip_class >= CIK)
> > > > >               si_cp_dma_wait_for_idle(ctx);
> > > > > 
> > > > >       /* Wait for draw calls to finish if needed. */
> > > > >       if (wait_flags) {
> > > > >               ctx->flags |= wait_flags;
> > > > > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> > > > >        * IB starts drawing.
> > > > >        *
> > > > >        * TODO: Do we also need to invalidate CB & DB caches?
> > > > >        */
> > > > >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
> > > > >                     SI_CONTEXT_INV_SMEM_L1 |
> > > > >                     SI_CONTEXT_INV_VMEM_L1 |
> > > > >                     SI_CONTEXT_INV_GLOBAL_L2 |
> > > > >                     SI_CONTEXT_START_PIPELINE_STATS;
> > > > > 
> > > > > +     ctx->cs_shader_state.initialized = false;
> > > > > +     si_all_descriptors_begin_new_cs(ctx);
> > > > > +     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > +
> > > > > +     if (!ctx->has_graphics) {
> > > > > +             ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
> > > > > +             return;
> > > > > +     }
> > > > > +
> > > > >       /* set all valid group as dirty so they get reemited on
> > > > >        * next draw command
> > > > >        */
> > > > >       si_pm4_reset_emitted(ctx);
> > > > > 
> > > > >       /* The CS initialization should be emitted before everything
> > > > 
> > > > else. */
> > > > >       si_pm4_emit(ctx, ctx->init_config);
> > > > >       if (ctx->init_config_gs_rings)
> > > > >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
> > > > > 
> > > > > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
> > > > >       if (ctx->chip_class >= GFX9)
> > > > >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
> > > > >       /* CLEAR_STATE disables all window rectangles. */
> > > > >       if (!has_clear_state || ctx->num_window_rectangles > 0)
> > > > >               si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
> > > > > -     si_all_descriptors_begin_new_cs(ctx);
> > > > > -     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > 
> > > > >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > >       ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS)
> > > > 
> > > > - 1;
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
> > > > > 
> > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
> > > > >       if (ctx->scratch_buffer) {
> > > > > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> > > > >       ctx->last_multi_vgt_param = -1;
> > > > >       ctx->last_rast_prim = -1;
> > > > >       ctx->last_sc_line_stipple = ~0;
> > > > >       ctx->last_vs_state = ~0;
> > > > >       ctx->last_ls = NULL;
> > > > >       ctx->last_tcs = NULL;
> > > > >       ctx->last_tes_sh_base = -1;
> > > > >       ctx->last_num_tcs_input_cp = -1;
> > > > >       ctx->last_ls_hs_config = -1; /* impossible value */
> > > > > 
> > > > > -     ctx->cs_shader_state.initialized = false;
> > > > > -
> > > > >       if (has_clear_state) {
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] =
> > > > > 0x00000000;
> > > > >               ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
> > > > 
> > > > =
> > > > > 0x00000000;
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
> > > > > 0x00000000;
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] =
> > > > > 0x00000000;
> > > > >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
> > > > 
> > > > 0xffffffff;
> > > > >               ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
> > > > 
> > > > 0x00000000;
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] =
> > > > > 0x00000000;
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
> > > > > 0x00000000;
> > > > > 
> > > > 
> > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
> > > > > 0x00000000;
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > index 20767c806d2..c2ec664d5a4 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > @@ -381,61 +381,56 @@ static struct pipe_context
> > > > > *si_create_context(struct pipe_screen *screen,
> > > > >  {
> > > > >       struct si_context *sctx = CALLOC_STRUCT(si_context);
> > > > >       struct si_screen* sscreen = (struct si_screen *)screen;
> > > > >       struct radeon_winsys *ws = sscreen->ws;
> > > > >       int shader, i;
> > > > >       bool stop_exec_on_failure = (flags &
> > > > > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
> > > > > 
> > > > >       if (!sctx)
> > > > >               return NULL;
> > > > > 
> > > > > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
> > > > > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
> > > > > +
> > > > >       if (flags & PIPE_CONTEXT_DEBUG)
> > > > >               sscreen->record_llvm_ir = true; /* racy but not critical
> > > > 
> > > > */
> > > > > 
> > > > >       sctx->b.screen = screen; /* this must be set first */
> > > > >       sctx->b.priv = NULL;
> > > > >       sctx->b.destroy = si_destroy_context;
> > > > > -     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > -     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > -     sctx->b.set_log_context = si_set_log_context;
> > > > > -     sctx->b.set_context_param = si_set_context_param;
> > > > >       sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
> > > > >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
> > > > > 
> > > > >       slab_create_child(&sctx->pool_transfers,
> > > > 
> > > > &sscreen->pool_transfers);
> > > > >       slab_create_child(&sctx->pool_transfers_unsync,
> > > > > &sscreen->pool_transfers);
> > > > > 
> > > > >       sctx->ws = sscreen->ws;
> > > > >       sctx->family = sscreen->info.family;
> > > > >       sctx->chip_class = sscreen->info.chip_class;
> > > > > 
> > > > >       if (sscreen->info.has_gpu_reset_counter_query) {
> > > > >               sctx->gpu_reset_counter =
> > > > >                       sctx->ws->query_value(sctx->ws,
> > > > 
> > > > RADEON_GPU_RESET_COUNTER);
> > > > >       }
> > > > > 
> > > > > -     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > -     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> > > > > -
> > > > > -     si_init_context_texture_functions(sctx);
> > > > > -     si_init_query_functions(sctx);
> > > > > 
> > > > >       if (sctx->chip_class == CIK ||
> > > > >           sctx->chip_class == VI ||
> > > > >           sctx->chip_class == GFX9) {
> > > > >               sctx->eop_bug_scratch = si_resource(
> > > > >                       pipe_buffer_create(&sscreen->b, 0,
> > > > 
> > > > PIPE_USAGE_DEFAULT,
> > > > >                                          16 *
> > > > 
> > > > sscreen->info.num_render_backends));
> > > > >               if (!sctx->eop_bug_scratch)
> > > > >                       goto fail;
> > > > >       }
> > > > > 
> > > > > +     /* Initialize context allocators. */
> > > > >       sctx->allocator_zeroed_memory =
> > > > >               u_suballocator_create(&sctx->b, 128 * 1024,
> > > > >                                     0, PIPE_USAGE_DEFAULT,
> > > > >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
> > > > >                                     SI_RESOURCE_FLAG_CLEAR, false);
> > > > >       if (!sctx->allocator_zeroed_memory)
> > > > >               goto fail;
> > > > > 
> > > > >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
> > > > >                                                   0, PIPE_USAGE_STREAM,
> > > > > @@ -459,38 +454,22 @@ static struct pipe_context
> > > > > *si_create_context(struct pipe_screen *screen,
> > > > >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
> > > > >       if (!sctx->ctx)
> > > > >               goto fail;
> > > > > 
> > > > >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
> > > > > DBG(NO_ASYNC_DMA))) {
> > > > >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
> > > > > 
> > > > 
> > > > (void*)si_flush_dma_cs,
> > > > >                                                  sctx,
> > > > 
> > > > stop_exec_on_failure);
> > > > >       }
> > > > > 
> > > > > -     si_init_buffer_functions(sctx);
> > > > > -     si_init_clear_functions(sctx);
> > > > > -     si_init_blit_functions(sctx);
> > > > > -     si_init_compute_functions(sctx);
> > > > > -     si_init_compute_blit_functions(sctx);
> > > > > -     si_init_debug_functions(sctx);
> > > > > -     si_init_msaa_functions(sctx);
> > > > > -     si_init_streamout_functions(sctx);
> > > > > -
> > > > > -     if (sscreen->info.has_hw_decode) {
> > > > > -             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > -             sctx->b.create_video_buffer = si_video_buffer_create;
> > > > > -     } else {
> > > > > -             sctx->b.create_video_codec = vl_create_decoder;
> > > > > -             sctx->b.create_video_buffer = vl_video_buffer_create;
> > > > > -     }
> > > > > -
> > > > > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> > > > > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
> > > > > +                                  sctx->has_graphics ? RING_GFX :
> > > > 
> > > > RING_COMPUTE,
> > > > >                                    (void*)si_flush_gfx_cs, sctx,
> > > > 
> > > > stop_exec_on_failure);
> > > > > 
> > > > >       /* Border colors. */
> > > > >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
> > > > > 
> > > > 
> > > >  sizeof(*sctx->border_color_table));
> > > > >       if (!sctx->border_color_table)
> > > > >               goto fail;
> > > > > 
> > > > >       sctx->border_color_buffer = si_resource(
> > > > >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> > > > > @@ -498,43 +477,76 @@ static struct pipe_context
> > > > > *si_create_context(struct pipe_screen *screen,
> > > > >                                  sizeof(*sctx->border_color_table)));
> > > > >       if (!sctx->border_color_buffer)
> > > > >               goto fail;
> > > > > 
> > > > >       sctx->border_color_map =
> > > > >               ws->buffer_map(sctx->border_color_buffer->buf,
> > > > >                              NULL, PIPE_TRANSFER_WRITE);
> > > > >       if (!sctx->border_color_map)
> > > > >               goto fail;
> > > > > 
> > > > > +     /* Initialize context functions used by graphics and compute. */
> > > > > +     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > +     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > +     sctx->b.set_log_context = si_set_log_context;
> > > > > +     sctx->b.set_context_param = si_set_context_param;
> > > > > +     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > +     sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> > > > > +     sctx->b.memory_barrier = si_memory_barrier;
> > > > > +
> > > > >       si_init_all_descriptors(sctx);
> > > > > +     si_init_buffer_functions(sctx);
> > > > > +     si_init_clear_functions(sctx);
> > > > > +     si_init_blit_functions(sctx);
> > > > > +     si_init_compute_functions(sctx);
> > > > > +     si_init_compute_blit_functions(sctx);
> > > > > +     si_init_debug_functions(sctx);
> > > > >       si_init_fence_functions(sctx);
> > > > > -     si_init_state_functions(sctx);
> > > > > -     si_init_shader_functions(sctx);
> > > > > -     si_init_viewport_functions(sctx);
> > > > > -
> > > > > -     if (sctx->chip_class >= CIK)
> > > > > -             cik_init_sdma_functions(sctx);
> > > > > -     else
> > > > > -             si_init_dma_functions(sctx);
> > > > > 
> > > > >       if (sscreen->debug_flags & DBG(FORCE_DMA))
> > > > >               sctx->b.resource_copy_region = sctx->dma_copy;
> > > > > 
> > > > > -     sctx->blitter = util_blitter_create(&sctx->b);
> > > > > -     if (sctx->blitter == NULL)
> > > > > -             goto fail;
> > > > > -     sctx->blitter->skip_viewport_restore = true;
> > > > > +     /* Initialize graphics-only context functions. */
> > > > > +     if (sctx->has_graphics) {
> > > > > +             si_init_context_texture_functions(sctx);
> > > > > +             si_init_query_functions(sctx);
> > > > > +             si_init_msaa_functions(sctx);
> > > > > +             si_init_shader_functions(sctx);
> > > > > +             si_init_state_functions(sctx);
> > > > > +             si_init_streamout_functions(sctx);
> > > > > +             si_init_viewport_functions(sctx);
> > > > > +
> > > > > +             sctx->blitter = util_blitter_create(&sctx->b);
> > > > > +             if (sctx->blitter == NULL)
> > > > > +                     goto fail;
> > > > > +             sctx->blitter->skip_viewport_restore = true;
> > > > > 
> > > > > -     si_init_draw_functions(sctx);
> > > > > +             si_init_draw_functions(sctx);
> > > > > +     }
> > > > > +
> > > > > +     /* Initialize SDMA functions. */
> > > > > +     if (sctx->chip_class >= CIK)
> > > > > +             cik_init_sdma_functions(sctx);
> > > > > +     else
> > > > > +             si_init_dma_functions(sctx);
> > > > > 
> > > > >       sctx->sample_mask = 0xffff;
> > > > > 
> > > > > +     /* Initialize multimedia functions. */
> > > > > +     if (sscreen->info.has_hw_decode) {
> > > > > +             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > +             sctx->b.create_video_buffer = si_video_buffer_create;
> > > > > +     } else {
> > > > > +             sctx->b.create_video_codec = vl_create_decoder;
> > > > > +             sctx->b.create_video_buffer = vl_video_buffer_create;
> > > > > +     }
> > > > > +
> > > > >       if (sctx->chip_class >= GFX9) {
> > > > >               sctx->wait_mem_scratch = si_resource(
> > > > >                       pipe_buffer_create(screen, 0,
> > > > 
> > > > PIPE_USAGE_DEFAULT, 4));
> > > > >               if (!sctx->wait_mem_scratch)
> > > > >                       goto fail;
> > > > > 
> > > > >               /* Initialize the memory. */
> > > > >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
> > > > >                                V_370_MEM, V_370_ME,
> > > > 
> > > > &sctx->wait_mem_number);
> > > > >       }
> > > > > @@ -544,21 +556,22 @@ static struct pipe_context
> > > > > *si_create_context(struct pipe_screen *screen,
> > > > >       if (sctx->chip_class == CIK) {
> > > > >               sctx->null_const_buf.buffer =
> > > > >                       pipe_aligned_buffer_create(screen,
> > > > > 
> > > > 
> > > > SI_RESOURCE_FLAG_32BIT,
> > > > >                                                  PIPE_USAGE_DEFAULT,
> > > > 
> > > > 16,
> > > > > 
> > > > 
> > > > sctx->screen->info.tcc_cache_line_size);
> > > > >               if (!sctx->null_const_buf.buffer)
> > > > >                       goto fail;
> > > > >               sctx->null_const_buf.buffer_size =
> > > > > sctx->null_const_buf.buffer->width0;
> > > > > 
> > > > > -             for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> > > > > +             unsigned start_shader = sctx->has_graphics ? 0 :
> > > > > PIPE_SHADER_COMPUTE;
> > > > > +             for (shader = start_shader; shader < SI_NUM_SHADERS;
> > > > 
> > > > shader++) {
> > > > >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
> > > > >                               sctx->b.set_constant_buffer(&sctx->b,
> > > > 
> > > > shader, i,
> > > > > 
> > > > 
> > > >  &sctx->null_const_buf);
> > > > >                       }
> > > > >               }
> > > > > 
> > > > >               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
> > > > >                                &sctx->null_const_buf);
> > > > >               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
> > > > >                                &sctx->null_const_buf);
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > index b01d5744752..348e8e5bd26 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > @@ -777,21 +777,21 @@ struct si_saved_cs {
> > > > >  };
> > > > > 
> > > > >  struct si_context {
> > > > >       struct pipe_context             b; /* base class */
> > > > > 
> > > > >       enum radeon_family              family;
> > > > >       enum chip_class                 chip_class;
> > > > > 
> > > > >       struct radeon_winsys            *ws;
> > > > >       struct radeon_winsys_ctx        *ctx;
> > > > > -     struct radeon_cmdbuf            *gfx_cs;
> > > > > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
> > > > 
> > > > graphics is disabled
> > > > > */
> > > > >       struct radeon_cmdbuf            *dma_cs;
> > > > >       struct pipe_fence_handle        *last_gfx_fence;
> > > > >       struct pipe_fence_handle        *last_sdma_fence;
> > > > >       struct si_resource              *eop_bug_scratch;
> > > > >       struct u_upload_mgr             *cached_gtt_allocator;
> > > > >       struct threaded_context         *tc;
> > > > >       struct u_suballocator           *allocator_zeroed_memory;
> > > > >       struct slab_child_pool          pool_transfers;
> > > > >       struct slab_child_pool          pool_transfers_unsync; /* for
> > > > > threaded_context */
> > > > >       struct pipe_device_reset_callback device_reset_callback;
> > > > > @@ -815,20 +815,21 @@ struct si_context {
> > > > >       void                            *cs_clear_render_target;
> > > > >       void                            *cs_clear_render_target_1d_array;
> > > > >       struct si_screen                *screen;
> > > > >       struct pipe_debug_callback      debug;
> > > > >       struct ac_llvm_compiler         compiler; /* only non-threaded
> > > > 
> > > > compilation
> > > > > */
> > > > >       struct si_shader_ctx_state      fixed_func_tcs_shader;
> > > > >       struct si_resource              *wait_mem_scratch;
> > > > >       unsigned                        wait_mem_number;
> > > > >       uint16_t                        prefetch_L2_mask;
> > > > > 
> > > > > +     bool                            has_graphics;
> > > > >       bool                            gfx_flush_in_progress:1;
> > > > >       bool                            gfx_last_ib_is_busy:1;
> > > > >       bool                            compute_is_busy:1;
> > > > > 
> > > > >       unsigned                        num_gfx_cs_flushes;
> > > > >       unsigned                        initial_gfx_cs_size;
> > > > >       unsigned                        gpu_reset_counter;
> > > > >       unsigned                        last_dirty_tex_counter;
> > > > >       unsigned                        last_compressed_colortex_counter;
> > > > >       unsigned                        last_num_draw_calls;
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > > > > b/src/gallium/drivers/radeonsi/si_state.c
> > > > > index b49a1b3695e..458b108a7e3 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_state.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > > > > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
> > > > > pipe_context *ctx, unsigned flags)
> > > > > 
> > > > >       si_update_fb_dirtiness_after_rendering(sctx);
> > > > > 
> > > > >       /* Multisample surfaces are flushed in si_decompress_textures. */
> > > > >       if (sctx->framebuffer.uncompressed_cb_mask)
> > > > >               si_make_CB_shader_coherent(sctx,
> > > > 
> > > > sctx->framebuffer.nr_samples,
> > > > > 
> > > > 
> > > > sctx->framebuffer.CB_has_shader_readable_metadata);
> > > > >  }
> > > > > 
> > > > >  /* This only ensures coherency for shader image/buffer stores. */
> > > > > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
> > > > > flags)
> > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> > > > >  {
> > > > >       struct si_context *sctx = (struct si_context *)ctx;
> > > > > 
> > > > >       /* Subsequent commands must wait for all shader invocations to
> > > > >        * complete. */
> > > > >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > 
> > > > >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
> > > > >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> > > > > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context
> > > > > *sctx)
> > > > >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
> > > > > 
> > > > >       sctx->b.set_sample_mask = si_set_sample_mask;
> > > > > 
> > > > >       sctx->b.create_vertex_elements_state = si_create_vertex_elements;
> > > > >       sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
> > > > >       sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
> > > > >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
> > > > > 
> > > > >       sctx->b.texture_barrier = si_texture_barrier;
> > > > > -     sctx->b.memory_barrier = si_memory_barrier;
> > > > >       sctx->b.set_min_samples = si_set_min_samples;
> > > > >       sctx->b.set_tess_state = si_set_tess_state;
> > > > > 
> > > > >       sctx->b.set_active_query_state = si_set_active_query_state;
> > > > > 
> > > > >       si_init_config(sctx);
> > > > >  }
> > > > > 
> > > > >  void si_init_screen_state_functions(struct si_screen *sscreen)
> > > > >  {
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.h
> > > > > b/src/gallium/drivers/radeonsi/si_state.h
> > > > > index 767e789276a..6faa4c511b1 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_state.h
> > > > > +++ b/src/gallium/drivers/radeonsi/si_state.h
> > > > > @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct
> > > > > si_context *sctx,
> > > > >                                         struct si_shader_selector
> > > > 
> > > > *sel);
> > > > >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
> > > > >                                            struct pb_slab_entry
> > > > 
> > > > *entry);
> > > > >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv,
> > > > 
> > > > unsigned
> > > > > heap,
> > > > >                                                 unsigned entry_size,
> > > > >                                                 unsigned group_index);
> > > > >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
> > > > > *pslab);
> > > > >  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
> > > > > *buf,
> > > > >                     uint64_t old_va);
> > > > >  /* si_state.c */
> > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
> > > > >  void si_init_state_functions(struct si_context *sctx);
> > > > >  void si_init_screen_state_functions(struct si_screen *sscreen);
> > > > >  void
> > > > >  si_make_buffer_descriptor(struct si_screen *screen, struct
> > > > 
> > > > si_resource
> > > > > *buf,
> > > > >                         enum pipe_format format,
> > > > >                         unsigned offset, unsigned size,
> > > > >                         uint32_t *state);
> > > > >  void
> > > > >  si_make_texture_descriptor(struct si_screen *screen,
> > > > >                          struct si_texture *tex,
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > index 9c968e39c2c..2a514f144b9 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
> > > > > si_context *sctx,
> > > > > 
> > > > 
> > > >  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
> > > > >               }
> > > > >       }
> > > > >  }
> > > > > 
> > > > >  static void si_emit_surface_sync(struct si_context *sctx,
> > > > >                                unsigned cp_coher_cntl)
> > > > >  {
> > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > > 
> > > > > -     if (sctx->chip_class >= GFX9) {
> > > > > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
> > > > >               /* Flush caches and wait for the caches to assert idle.
> > > > 
> > > > */
> > > > >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
> > > > >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
> > > > >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
> > > > >               radeon_emit(cs, 0xffffff);      /* CP_COHER_SIZE_HI */
> > > > >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
> > > > >               radeon_emit(cs, 0);             /* CP_COHER_BASE_HI */
> > > > >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
> > > > >       } else {
> > > > >               /* ACQUIRE_MEM is only required on a compute ring. */
> > > > > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
> > > > > si_context *sctx,
> > > > >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
> > > > >               radeon_emit(cs, 0);               /* CP_COHER_BASE */
> > > > >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
> > > > >       }
> > > > >  }
> > > > > 
> > > > >  void si_emit_cache_flush(struct si_context *sctx)
> > > > >  {
> > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > >       uint32_t flags = sctx->flags;
> > > > > +
> > > > > +     if (!sctx->has_graphics) {
> > > > > +             /* Only process compute flags. */
> > > > > +             flags &= SI_CONTEXT_INV_ICACHE |
> > > > > +                      SI_CONTEXT_INV_SMEM_L1 |
> > > > > +                      SI_CONTEXT_INV_VMEM_L1 |
> > > > > +                      SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> > > > > +                      SI_CONTEXT_INV_L2_METADATA |
> > > > > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > +     }
> > > > > +
> > > > >       uint32_t cp_coher_cntl = 0;
> > > > >       uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
> > > > >                                       SI_CONTEXT_FLUSH_AND_INV_DB);
> > > > > 
> > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
> > > > >               sctx->num_cb_cache_flushes++;
> > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
> > > > >               sctx->num_db_cache_flushes++;
> > > > > 
> > > > >       /* SI has a bug that it always flushes ICACHE and KCACHE if
> > > > 
> > > > either
> > > > > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context
> > > > > *sctx)
> > > > >                                 EOP_DATA_SEL_VALUE_32BIT,
> > > > >                                 sctx->wait_mem_scratch, va,
> > > > >                                 sctx->wait_mem_number, SI_NOT_QUERY);
> > > > >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
> > > > 
> > > > 0xffffffff,
> > > > >                              WAIT_REG_MEM_EQUAL);
> > > > >       }
> > > > > 
> > > > >       /* Make sure ME is idle (it executes most packets) before
> > > > 
> > > > continuing.
> > > > >        * This prevents read-after-write hazards between PFP and ME.
> > > > >        */
> > > > > -     if (cp_coher_cntl ||
> > > > > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > -                         SI_CONTEXT_INV_VMEM_L1 |
> > > > > -                         SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> > > > > +     if (sctx->has_graphics &&
> > > > > +         (cp_coher_cntl ||
> > > > > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > +                    SI_CONTEXT_INV_VMEM_L1 |
> > > > > +                    SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
> > > > >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> > > > >               radeon_emit(cs, 0);
> > > > >       }
> > > > > 
> > > > >       /* SI-CI-VI only:
> > > > >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
> > > > > SURFACE_SYNC
> > > > >        *   waits for idle, so it should be last. SURFACE_SYNC is done
> > > > 
> > > > in
> > > > > PFP.
> > > > >        *
> > > > >        * cp_coher_cntl should contain all necessary flags except TC
> > > > 
> > > > flags
> > > > >        * at this point.
> > > > > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > index a50088d2d8f..581f90a7b2f 100644
> > > > > --- a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > +++ b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
> > > > > si_screen *sscreen,
> > > > >   *   compressed tiled
> > > > >   *
> > > > >   * \param sctx  the current context if you have one, or
> > > > > sscreen->aux_context
> > > > >   *              if you don't.
> > > > >   */
> > > > >  bool si_texture_disable_dcc(struct si_context *sctx,
> > > > >                           struct si_texture *tex)
> > > > >  {
> > > > >       struct si_screen *sscreen = sctx->screen;
> > > > > 
> > > > > +     if (!sctx->has_graphics)
> > > > > +             return si_texture_discard_dcc(sscreen, tex);
> > > > > +
> > > > >       if (!si_can_disable_dcc(tex))
> > > > >               return false;
> > > > > 
> > > > >       if (&sctx->b == sscreen->aux_context)
> > > > >               mtx_lock(&sscreen->aux_context_lock);
> > > > > 
> > > > >       /* Decompress DCC. */
> > > > >       si_decompress_dcc(sctx, tex);
> > > > >       sctx->b.flush(&sctx->b, NULL, 0);
> > > 
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-dev&amp;data=02%7C01%7Cjan.vesely%40cs.rutgers.edu%7C9490f91b0c3f45a70d5d08d69c430b00%7Cb92d2b234d35447093ff69aca6632ffe%7C1%7C0%7C636868209108039866&amp;sdata=yy6mnl04artBw7IdTt%2Bep4liICCm1EwgDYM%2FPM35U7U%3D&amp;reserved=0
> > 
> >
On Thu, Feb 28, 2019 at 4:44 AM Jan Vesely <jan.vesely@rutgers.edu> wrote:

> On Tue, 2019-02-26 at 18:34 -0500, Marek Olšák wrote:
> > I ran a simple test verifying that compute is working properly on the
> > compute ring.
>
> I guess this was not on raven? With his patch I no loner see gfx
> timeout but the apps still hang. anyway that's a separate issue.
>

If clover hangs, gfx timeouts are now compute timeouts, which might not be
printed in dmesg. It's still a hang, it just doesn't always affect gfx.

Marek


>
> >
> > When clover is using compute rings, it doesn't stall/block graphics
> > operations.
>
> I'd be nice to include this information in the commit message.
>
> Jan
>
> >
> > Marek
> >
> > On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely <jan.vesely@rutgers.edu>
> wrote:
> >
> > > Can you add a bit of background why clover should/should not use other
> > > rings?
> > >
> > > I planned to test this, but my raven system can't run clover since
> kernel
> > > 4.20 release (BZ 109649), so I need to bisect that first.
> > > Can this patch help address the soft lockup issue on CIK (BZ 108879)?
> > > presumably, it was tested using clover on CIK, right?
> > >
> > > Jan
> > >
> > > On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo@gmail.com> wrote:
> > >
> > > > I'll just push it.
> > > >
> > > > Marek
> > > >
> > > > On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de>
> > > > wrote:
> > > >
> > > > > Hello Marek,
> > > > >
> > > > > this series need a rebase (if you have some time).
> > > > >
> > > > > Dieter
> > > > >
> > > > > Am 12.02.2019 19:12, schrieb Marek Olšák:
> > > > > > From: Marek Olšák <marek.olsak@amd.com>
> > > > > >
> > > > > > initialize all non-compute context functions to NULL.
> > > > > >
> > > > > > v2: fix SI
> > > > > > ---
> > > > > >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
> > > > > >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
> > > > > >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
> > > > > >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
> > > > > >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
> > > > > >  src/gallium/drivers/radeonsi/si_pipe.c        | 95
> +++++++++++--------
> > > > > >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
> > > > > >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
> > > > > >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
> > > > > >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
> > > > > >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
> > > > > >  11 files changed, 130 insertions(+), 75 deletions(-)
> > > > > >
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > index bb8d1cbd12d..f39cb5d143f 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
> > > > > > pipe_context *ctx,
> > > > > >
> > > > > >               if (separate_dcc_dirty) {
> > > > > >                       tex->separate_dcc_dirty = false;
> > > > > >
>  vi_separate_dcc_process_and_reset_stats(ctx,
> > > > >
> > > > > tex);
> > > > > >               }
> > > > > >       }
> > > > > >  }
> > > > > >
> > > > > >  void si_decompress_dcc(struct si_context *sctx, struct
> si_texture
> > > > > > *tex)
> > > > > >  {
> > > > > > -     if (!tex->dcc_offset)
> > > > > > +     /* If graphics is disabled, we can't decompress DCC, but it
> > > > >
> > > > > shouldn't
> > > > > > +      * be compressed either. The caller should simply discard
> it.
> > > > > > +      */
> > > > > > +     if (!tex->dcc_offset || !sctx->has_graphics)
> > > > > >               return;
> > > > > >
> > > > > >       si_blit_decompress_color(sctx, tex, 0,
> > > > >
> > > > > tex->buffer.b.b.last_level,
> > > > > >                                0,
> util_max_layer(&tex->buffer.b.b, 0),
> > > > > >                                true);
> > > > > >  }
> > > > > >
> > > > > >  void si_init_blit_functions(struct si_context *sctx)
> > > > > >  {
> > > > > >       sctx->b.resource_copy_region = si_resource_copy_region;
> > > > > > -     sctx->b.blit = si_blit;
> > > > > > -     sctx->b.flush_resource = si_flush_resource;
> > > > > > -     sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > > +
> > > > > > +     if (sctx->has_graphics) {
> > > > > > +             sctx->b.blit = si_blit;
> > > > > > +             sctx->b.flush_resource = si_flush_resource;
> > > > > > +             sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > > +     }
> > > > > >  }
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > index 9a00bb73b94..e1805f2a1c9 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > @@ -764,15 +764,18 @@ static void si_clear_texture(struct
> pipe_context
> > > > > > *pipe,
> > > > > >                       util_clear_render_target(pipe, sf, &color,
> > > > > >                                                box->x, box->y,
> > > > > >                                                box->width,
> > > > >
> > > > > box->height);
> > > > > >               }
> > > > > >       }
> > > > > >       pipe_surface_reference(&sf, NULL);
> > > > > >  }
> > > > > >
> > > > > >  void si_init_clear_functions(struct si_context *sctx)
> > > > > >  {
> > > > > > -     sctx->b.clear = si_clear;
> > > > > >       sctx->b.clear_render_target = si_clear_render_target;
> > > > > > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> > > > > >       sctx->b.clear_texture = si_clear_texture;
> > > > > > +
> > > > > > +     if (sctx->has_graphics) {
> > > > > > +             sctx->b.clear = si_clear;
> > > > > > +             sctx->b.clear_depth_stencil =
> si_clear_depth_stencil;
> > > > > > +     }
> > > > > >  }
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > index 1a62b3e0844..87addd53976 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > @@ -880,26 +880,28 @@ static void si_launch_grid(
> > > > > >               info->block[0] * info->block[1] * info->block[2] >
> 256;
> > > > > >
> > > > > >       if (cs_regalloc_hang)
> > > > > >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > > >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > >
> > > > > >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
> > > > > >           program->shader.compilation_failed)
> > > > > >               return;
> > > > > >
> > > > > > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > > > > > -             si_update_fb_dirtiness_after_rendering(sctx);
> > > > > > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
> > > > > > -     }
> > > > > > +     if (sctx->has_graphics) {
> > > > > > +             if (sctx->last_num_draw_calls !=
> sctx->num_draw_calls) {
> > > > > > +
>  si_update_fb_dirtiness_after_rendering(sctx);
> > > > > > +                     sctx->last_num_draw_calls =
> sctx->num_draw_calls;
> > > > > > +             }
> > > > > >
> > > > > > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > > > > > +             si_decompress_textures(sctx, 1 <<
> PIPE_SHADER_COMPUTE);
> > > > > > +     }
> > > > > >
> > > > > >       /* Add buffer sizes for memory checking in need_cs_space.
> */
> > > > > >       si_context_add_resource_size(sctx,
> &program->shader.bo->b.b);
> > > > > >       /* TODO: add the scratch buffer */
> > > > > >
> > > > > >       if (info->indirect) {
> > > > > >               si_context_add_resource_size(sctx, info->indirect);
> > > > > >
> > > > > >               /* Indirect buffers use TC L2 on GFX9, but not
> older hw.
> > > > >
> > > > > */
> > > > > >               if (sctx->chip_class <= VI &&
> > > > > > @@ -917,21 +919,22 @@ static void si_launch_grid(
> > > > > >       if (sctx->flags)
> > > > > >               si_emit_cache_flush(sctx);
> > > > > >
> > > > > >       if (!si_switch_compute_shader(sctx, program,
> &program->shader,
> > > > > >                                       code_object, info->pc))
> > > > > >               return;
> > > > > >
> > > > > >       si_upload_compute_shader_descriptors(sctx);
> > > > > >       si_emit_compute_shader_pointers(sctx);
> > > > > >
> > > > > > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > > > +     if (sctx->has_graphics &&
> > > > > > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > > >               sctx->atoms.s.render_cond.emit(sctx);
> > > > > >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
> > > > >
> > > > > false);
> > > > > >       }
> > > > > >
> > > > > >       if ((program->input_size ||
> > > > > >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
> > > > > >             unlikely(!si_upload_compute_input(sctx, code_object,
> > > > > > info))) {
> > > > > >               return;
> > > > > >       }
> > > > > >
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > index 21d4ca946d3..0f22c55723c 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > @@ -2640,22 +2640,24 @@ void
> > > > > > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
> > > > > >
> > > > > >       sctx->num_resident_handles += num_resident_tex_handles +
> > > > > >                                       num_resident_img_handles;
> > > > > >  }
> > > > > >
> > > > > >  /* INIT/DEINIT/UPLOAD */
> > > > > >
> > > > > >  void si_init_all_descriptors(struct si_context *sctx)
> > > > > >  {
> > > > > >       int i;
> > > > > > +     unsigned first_shader =
> > > > > > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
> > > > > >
> > > > > > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
> > > > > > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
> > > > > >               bool is_2nd = sctx->chip_class >= GFX9 &&
> > > > > >                                    (i == PIPE_SHADER_TESS_CTRL ||
> > > > > >                                     i == PIPE_SHADER_GEOMETRY);
> > > > > >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
> > > > >
> > > > > SI_NUM_SAMPLERS;
> > > > > >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
> > > > > > SI_NUM_CONST_BUFFERS;
> > > > > >               int rel_dw_offset;
> > > > > >               struct si_descriptors *desc;
> > > > > >
> > > > > >               if (is_2nd) {
> > > > > >                       if (i == PIPE_SHADER_TESS_CTRL) {
> > > > > > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct
> si_context
> > > > > > *sctx)
> > > > > >       si_init_bindless_descriptors(sctx,
> &sctx->bindless_descriptors,
> > > > > >
> > > > >
> > > > > SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
> > > > > >                                    1024);
> > > > > >
> > > > > >       sctx->descriptors_dirty = u_bit_consecutive(0,
> SI_NUM_DESCS);
> > > > > >
> > > > > >       /* Set pipe_context functions. */
> > > > > >       sctx->b.bind_sampler_states = si_bind_sampler_states;
> > > > > >       sctx->b.set_shader_images = si_set_shader_images;
> > > > > >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
> > > > > > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > > >       sctx->b.set_shader_buffers = si_set_shader_buffers;
> > > > > >       sctx->b.set_sampler_views = si_set_sampler_views;
> > > > > >       sctx->b.create_texture_handle = si_create_texture_handle;
> > > > > >       sctx->b.delete_texture_handle = si_delete_texture_handle;
> > > > > >       sctx->b.make_texture_handle_resident =
> > > > > > si_make_texture_handle_resident;
> > > > > >       sctx->b.create_image_handle = si_create_image_handle;
> > > > > >       sctx->b.delete_image_handle = si_delete_image_handle;
> > > > > >       sctx->b.make_image_handle_resident =
> > > > >
> > > > > si_make_image_handle_resident;
> > > > > >
> > > > > > +     if (!sctx->has_graphics)
> > > > > > +             return;
> > > > > > +
> > > > > > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > > > +
> > > > > >       /* Shader user data. */
> > > > > >       sctx->atoms.s.shader_pointers.emit =
> > > > > > si_emit_graphics_shader_pointers;
> > > > > >
> > > > > >       /* Set default and immutable mappings. */
> > > > > >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
> > > > > > R_00B130_SPI_SHADER_USER_DATA_VS_0);
> > > > > >
> > > > > >       if (sctx->chip_class >= GFX9) {
> > > > > >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
> > > > > >
> > > > >
> > > > >  R_00B430_SPI_SHADER_USER_DATA_LS_0);
> > > > > >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > index 3d64587fa2b..d0e7cf20b4c 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context
> *ctx,
> > > > > > unsigned flags,
> > > > > >        * This code is only needed when the driver flushes the
> GFX IB
> > > > > >        * internally, and it never asks for a fence handle.
> > > > > >        */
> > > > > >       if (radeon_emitted(ctx->dma_cs, 0)) {
> > > > > >               assert(fence == NULL); /* internal flushes only */
> > > > > >               si_flush_dma_cs(ctx, flags, NULL);
> > > > > >       }
> > > > > >
> > > > > >       ctx->gfx_flush_in_progress = true;
> > > > > >
> > > > > > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > > -             si_suspend_queries(ctx);
> > > > > > -
> > > > > > -     ctx->streamout.suspended = false;
> > > > > > -     if (ctx->streamout.begin_emitted) {
> > > > > > -             si_emit_streamout_end(ctx);
> > > > > > -             ctx->streamout.suspended = true;
> > > > > > +     if (ctx->has_graphics) {
> > > > > > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > > +                     si_suspend_queries(ctx);
> > > > > > +
> > > > > > +             ctx->streamout.suspended = false;
> > > > > > +             if (ctx->streamout.begin_emitted) {
> > > > > > +                     si_emit_streamout_end(ctx);
> > > > > > +                     ctx->streamout.suspended = true;
> > > > > > +             }
> > > > > >       }
> > > > > >
> > > > > >       /* Make sure CP DMA is idle at the end of IBs after L2
> prefetches
> > > > > >        * because the kernel doesn't wait for it. */
> > > > > >       if (ctx->chip_class >= CIK)
> > > > > >               si_cp_dma_wait_for_idle(ctx);
> > > > > >
> > > > > >       /* Wait for draw calls to finish if needed. */
> > > > > >       if (wait_flags) {
> > > > > >               ctx->flags |= wait_flags;
> > > > > > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context
> *ctx)
> > > > > >        * IB starts drawing.
> > > > > >        *
> > > > > >        * TODO: Do we also need to invalidate CB & DB caches?
> > > > > >        */
> > > > > >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
> > > > > >                     SI_CONTEXT_INV_SMEM_L1 |
> > > > > >                     SI_CONTEXT_INV_VMEM_L1 |
> > > > > >                     SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > >                     SI_CONTEXT_START_PIPELINE_STATS;
> > > > > >
> > > > > > +     ctx->cs_shader_state.initialized = false;
> > > > > > +     si_all_descriptors_begin_new_cs(ctx);
> > > > > > +     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > > +
> > > > > > +     if (!ctx->has_graphics) {
> > > > > > +             ctx->initial_gfx_cs_size =
> ctx->gfx_cs->current.cdw;
> > > > > > +             return;
> > > > > > +     }
> > > > > > +
> > > > > >       /* set all valid group as dirty so they get reemited on
> > > > > >        * next draw command
> > > > > >        */
> > > > > >       si_pm4_reset_emitted(ctx);
> > > > > >
> > > > > >       /* The CS initialization should be emitted before
> everything
> > > > >
> > > > > else. */
> > > > > >       si_pm4_emit(ctx, ctx->init_config);
> > > > > >       if (ctx->init_config_gs_rings)
> > > > > >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
> > > > > >
> > > > > > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context
> *ctx)
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
> > > > > >       if (ctx->chip_class >= GFX9)
> > > > > >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
> > > > > >       /* CLEAR_STATE disables all window rectangles. */
> > > > > >       if (!has_clear_state || ctx->num_window_rectangles > 0)
> > > > > >               si_mark_atom_dirty(ctx,
> &ctx->atoms.s.window_rectangles);
> > > > > > -     si_all_descriptors_begin_new_cs(ctx);
> > > > > > -     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > >
> > > > > >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > > >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > > >       ctx->viewports.depth_range_dirty_mask = (1 <<
> SI_MAX_VIEWPORTS)
> > > > >
> > > > > - 1;
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
> > > > > >
> > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
> > > > > >       if (ctx->scratch_buffer) {
> > > > > > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context
> *ctx)
> > > > > >       ctx->last_multi_vgt_param = -1;
> > > > > >       ctx->last_rast_prim = -1;
> > > > > >       ctx->last_sc_line_stipple = ~0;
> > > > > >       ctx->last_vs_state = ~0;
> > > > > >       ctx->last_ls = NULL;
> > > > > >       ctx->last_tcs = NULL;
> > > > > >       ctx->last_tes_sh_base = -1;
> > > > > >       ctx->last_num_tcs_input_cp = -1;
> > > > > >       ctx->last_ls_hs_config = -1; /* impossible value */
> > > > > >
> > > > > > -     ctx->cs_shader_state.initialized = false;
> > > > > > -
> > > > > >       if (has_clear_state) {
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] =
> > > > > > 0x00000000;
> > > > > >
>  ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
> > > > >
> > > > > =
> > > > > > 0x00000000;
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
> > > > > > 0x00000000;
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] =
> > > > > > 0x00000000;
> > > > > >
>  ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
> > > > >
> > > > > 0xffffffff;
> > > > > >
>  ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
> > > > >
> > > > > 0x00000000;
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] =
> > > > > > 0x00000000;
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
> > > > > > 0x00000000;
> > > > > >
> > > > >
> > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
> > > > > > 0x00000000;
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > index 20767c806d2..c2ec664d5a4 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > @@ -381,61 +381,56 @@ static struct pipe_context
> > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > >  {
> > > > > >       struct si_context *sctx = CALLOC_STRUCT(si_context);
> > > > > >       struct si_screen* sscreen = (struct si_screen *)screen;
> > > > > >       struct radeon_winsys *ws = sscreen->ws;
> > > > > >       int shader, i;
> > > > > >       bool stop_exec_on_failure = (flags &
> > > > > > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
> > > > > >
> > > > > >       if (!sctx)
> > > > > >               return NULL;
> > > > > >
> > > > > > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
> > > > > > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
> > > > > > +
> > > > > >       if (flags & PIPE_CONTEXT_DEBUG)
> > > > > >               sscreen->record_llvm_ir = true; /* racy but not
> critical
> > > > >
> > > > > */
> > > > > >
> > > > > >       sctx->b.screen = screen; /* this must be set first */
> > > > > >       sctx->b.priv = NULL;
> > > > > >       sctx->b.destroy = si_destroy_context;
> > > > > > -     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > > -     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > > -     sctx->b.set_log_context = si_set_log_context;
> > > > > > -     sctx->b.set_context_param = si_set_context_param;
> > > > > >       sctx->screen = sscreen; /* Easy accessing of
> screen/winsys. */
> > > > > >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
> > > > > >
> > > > > >       slab_create_child(&sctx->pool_transfers,
> > > > >
> > > > > &sscreen->pool_transfers);
> > > > > >       slab_create_child(&sctx->pool_transfers_unsync,
> > > > > > &sscreen->pool_transfers);
> > > > > >
> > > > > >       sctx->ws = sscreen->ws;
> > > > > >       sctx->family = sscreen->info.family;
> > > > > >       sctx->chip_class = sscreen->info.chip_class;
> > > > > >
> > > > > >       if (sscreen->info.has_gpu_reset_counter_query) {
> > > > > >               sctx->gpu_reset_counter =
> > > > > >                       sctx->ws->query_value(sctx->ws,
> > > > >
> > > > > RADEON_GPU_RESET_COUNTER);
> > > > > >       }
> > > > > >
> > > > > > -     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > > -     sctx->b.set_device_reset_callback =
> si_set_device_reset_callback;
> > > > > > -
> > > > > > -     si_init_context_texture_functions(sctx);
> > > > > > -     si_init_query_functions(sctx);
> > > > > >
> > > > > >       if (sctx->chip_class == CIK ||
> > > > > >           sctx->chip_class == VI ||
> > > > > >           sctx->chip_class == GFX9) {
> > > > > >               sctx->eop_bug_scratch = si_resource(
> > > > > >                       pipe_buffer_create(&sscreen->b, 0,
> > > > >
> > > > > PIPE_USAGE_DEFAULT,
> > > > > >                                          16 *
> > > > >
> > > > > sscreen->info.num_render_backends));
> > > > > >               if (!sctx->eop_bug_scratch)
> > > > > >                       goto fail;
> > > > > >       }
> > > > > >
> > > > > > +     /* Initialize context allocators. */
> > > > > >       sctx->allocator_zeroed_memory =
> > > > > >               u_suballocator_create(&sctx->b, 128 * 1024,
> > > > > >                                     0, PIPE_USAGE_DEFAULT,
> > > > > >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
> > > > > >                                     SI_RESOURCE_FLAG_CLEAR,
> false);
> > > > > >       if (!sctx->allocator_zeroed_memory)
> > > > > >               goto fail;
> > > > > >
> > > > > >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 *
> 1024,
> > > > > >                                                   0,
> PIPE_USAGE_STREAM,
> > > > > > @@ -459,38 +454,22 @@ static struct pipe_context
> > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
> > > > > >       if (!sctx->ctx)
> > > > > >               goto fail;
> > > > > >
> > > > > >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags
> &
> > > > > > DBG(NO_ASYNC_DMA))) {
> > > > > >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx,
> RING_DMA,
> > > > > >
> > > > >
> > > > > (void*)si_flush_dma_cs,
> > > > > >                                                  sctx,
> > > > >
> > > > > stop_exec_on_failure);
> > > > > >       }
> > > > > >
> > > > > > -     si_init_buffer_functions(sctx);
> > > > > > -     si_init_clear_functions(sctx);
> > > > > > -     si_init_blit_functions(sctx);
> > > > > > -     si_init_compute_functions(sctx);
> > > > > > -     si_init_compute_blit_functions(sctx);
> > > > > > -     si_init_debug_functions(sctx);
> > > > > > -     si_init_msaa_functions(sctx);
> > > > > > -     si_init_streamout_functions(sctx);
> > > > > > -
> > > > > > -     if (sscreen->info.has_hw_decode) {
> > > > > > -             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > > -             sctx->b.create_video_buffer =
> si_video_buffer_create;
> > > > > > -     } else {
> > > > > > -             sctx->b.create_video_codec = vl_create_decoder;
> > > > > > -             sctx->b.create_video_buffer =
> vl_video_buffer_create;
> > > > > > -     }
> > > > > > -
> > > > > > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> > > > > > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
> > > > > > +                                  sctx->has_graphics ? RING_GFX
> :
> > > > >
> > > > > RING_COMPUTE,
> > > > > >                                    (void*)si_flush_gfx_cs, sctx,
> > > > >
> > > > > stop_exec_on_failure);
> > > > > >
> > > > > >       /* Border colors. */
> > > > > >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
> > > > > >
> > > > >
> > > > >  sizeof(*sctx->border_color_table));
> > > > > >       if (!sctx->border_color_table)
> > > > > >               goto fail;
> > > > > >
> > > > > >       sctx->border_color_buffer = si_resource(
> > > > > >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> > > > > > @@ -498,43 +477,76 @@ static struct pipe_context
> > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > >
> sizeof(*sctx->border_color_table)));
> > > > > >       if (!sctx->border_color_buffer)
> > > > > >               goto fail;
> > > > > >
> > > > > >       sctx->border_color_map =
> > > > > >               ws->buffer_map(sctx->border_color_buffer->buf,
> > > > > >                              NULL, PIPE_TRANSFER_WRITE);
> > > > > >       if (!sctx->border_color_map)
> > > > > >               goto fail;
> > > > > >
> > > > > > +     /* Initialize context functions used by graphics and
> compute. */
> > > > > > +     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > > +     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > > +     sctx->b.set_log_context = si_set_log_context;
> > > > > > +     sctx->b.set_context_param = si_set_context_param;
> > > > > > +     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > > +     sctx->b.set_device_reset_callback =
> si_set_device_reset_callback;
> > > > > > +     sctx->b.memory_barrier = si_memory_barrier;
> > > > > > +
> > > > > >       si_init_all_descriptors(sctx);
> > > > > > +     si_init_buffer_functions(sctx);
> > > > > > +     si_init_clear_functions(sctx);
> > > > > > +     si_init_blit_functions(sctx);
> > > > > > +     si_init_compute_functions(sctx);
> > > > > > +     si_init_compute_blit_functions(sctx);
> > > > > > +     si_init_debug_functions(sctx);
> > > > > >       si_init_fence_functions(sctx);
> > > > > > -     si_init_state_functions(sctx);
> > > > > > -     si_init_shader_functions(sctx);
> > > > > > -     si_init_viewport_functions(sctx);
> > > > > > -
> > > > > > -     if (sctx->chip_class >= CIK)
> > > > > > -             cik_init_sdma_functions(sctx);
> > > > > > -     else
> > > > > > -             si_init_dma_functions(sctx);
> > > > > >
> > > > > >       if (sscreen->debug_flags & DBG(FORCE_DMA))
> > > > > >               sctx->b.resource_copy_region = sctx->dma_copy;
> > > > > >
> > > > > > -     sctx->blitter = util_blitter_create(&sctx->b);
> > > > > > -     if (sctx->blitter == NULL)
> > > > > > -             goto fail;
> > > > > > -     sctx->blitter->skip_viewport_restore = true;
> > > > > > +     /* Initialize graphics-only context functions. */
> > > > > > +     if (sctx->has_graphics) {
> > > > > > +             si_init_context_texture_functions(sctx);
> > > > > > +             si_init_query_functions(sctx);
> > > > > > +             si_init_msaa_functions(sctx);
> > > > > > +             si_init_shader_functions(sctx);
> > > > > > +             si_init_state_functions(sctx);
> > > > > > +             si_init_streamout_functions(sctx);
> > > > > > +             si_init_viewport_functions(sctx);
> > > > > > +
> > > > > > +             sctx->blitter = util_blitter_create(&sctx->b);
> > > > > > +             if (sctx->blitter == NULL)
> > > > > > +                     goto fail;
> > > > > > +             sctx->blitter->skip_viewport_restore = true;
> > > > > >
> > > > > > -     si_init_draw_functions(sctx);
> > > > > > +             si_init_draw_functions(sctx);
> > > > > > +     }
> > > > > > +
> > > > > > +     /* Initialize SDMA functions. */
> > > > > > +     if (sctx->chip_class >= CIK)
> > > > > > +             cik_init_sdma_functions(sctx);
> > > > > > +     else
> > > > > > +             si_init_dma_functions(sctx);
> > > > > >
> > > > > >       sctx->sample_mask = 0xffff;
> > > > > >
> > > > > > +     /* Initialize multimedia functions. */
> > > > > > +     if (sscreen->info.has_hw_decode) {
> > > > > > +             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > > +             sctx->b.create_video_buffer =
> si_video_buffer_create;
> > > > > > +     } else {
> > > > > > +             sctx->b.create_video_codec = vl_create_decoder;
> > > > > > +             sctx->b.create_video_buffer =
> vl_video_buffer_create;
> > > > > > +     }
> > > > > > +
> > > > > >       if (sctx->chip_class >= GFX9) {
> > > > > >               sctx->wait_mem_scratch = si_resource(
> > > > > >                       pipe_buffer_create(screen, 0,
> > > > >
> > > > > PIPE_USAGE_DEFAULT, 4));
> > > > > >               if (!sctx->wait_mem_scratch)
> > > > > >                       goto fail;
> > > > > >
> > > > > >               /* Initialize the memory. */
> > > > > >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0,
> 4,
> > > > > >                                V_370_MEM, V_370_ME,
> > > > >
> > > > > &sctx->wait_mem_number);
> > > > > >       }
> > > > > > @@ -544,21 +556,22 @@ static struct pipe_context
> > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > >       if (sctx->chip_class == CIK) {
> > > > > >               sctx->null_const_buf.buffer =
> > > > > >                       pipe_aligned_buffer_create(screen,
> > > > > >
> > > > >
> > > > > SI_RESOURCE_FLAG_32BIT,
> > > > > >
> PIPE_USAGE_DEFAULT,
> > > > >
> > > > > 16,
> > > > > >
> > > > >
> > > > > sctx->screen->info.tcc_cache_line_size);
> > > > > >               if (!sctx->null_const_buf.buffer)
> > > > > >                       goto fail;
> > > > > >               sctx->null_const_buf.buffer_size =
> > > > > > sctx->null_const_buf.buffer->width0;
> > > > > >
> > > > > > -             for (shader = 0; shader < SI_NUM_SHADERS;
> shader++) {
> > > > > > +             unsigned start_shader = sctx->has_graphics ? 0 :
> > > > > > PIPE_SHADER_COMPUTE;
> > > > > > +             for (shader = start_shader; shader <
> SI_NUM_SHADERS;
> > > > >
> > > > > shader++) {
> > > > > >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++)
> {
> > > > > >
>  sctx->b.set_constant_buffer(&sctx->b,
> > > > >
> > > > > shader, i,
> > > > > >
> > > > >
> > > > >  &sctx->null_const_buf);
> > > > > >                       }
> > > > > >               }
> > > > > >
> > > > > >               si_set_rw_buffer(sctx,
> SI_HS_CONST_DEFAULT_TESS_LEVELS,
> > > > > >                                &sctx->null_const_buf);
> > > > > >               si_set_rw_buffer(sctx,
> SI_VS_CONST_INSTANCE_DIVISORS,
> > > > > >                                &sctx->null_const_buf);
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > index b01d5744752..348e8e5bd26 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > @@ -777,21 +777,21 @@ struct si_saved_cs {
> > > > > >  };
> > > > > >
> > > > > >  struct si_context {
> > > > > >       struct pipe_context             b; /* base class */
> > > > > >
> > > > > >       enum radeon_family              family;
> > > > > >       enum chip_class                 chip_class;
> > > > > >
> > > > > >       struct radeon_winsys            *ws;
> > > > > >       struct radeon_winsys_ctx        *ctx;
> > > > > > -     struct radeon_cmdbuf            *gfx_cs;
> > > > > > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
> > > > >
> > > > > graphics is disabled
> > > > > > */
> > > > > >       struct radeon_cmdbuf            *dma_cs;
> > > > > >       struct pipe_fence_handle        *last_gfx_fence;
> > > > > >       struct pipe_fence_handle        *last_sdma_fence;
> > > > > >       struct si_resource              *eop_bug_scratch;
> > > > > >       struct u_upload_mgr             *cached_gtt_allocator;
> > > > > >       struct threaded_context         *tc;
> > > > > >       struct u_suballocator           *allocator_zeroed_memory;
> > > > > >       struct slab_child_pool          pool_transfers;
> > > > > >       struct slab_child_pool          pool_transfers_unsync; /*
> for
> > > > > > threaded_context */
> > > > > >       struct pipe_device_reset_callback device_reset_callback;
> > > > > > @@ -815,20 +815,21 @@ struct si_context {
> > > > > >       void                            *cs_clear_render_target;
> > > > > >       void
> *cs_clear_render_target_1d_array;
> > > > > >       struct si_screen                *screen;
> > > > > >       struct pipe_debug_callback      debug;
> > > > > >       struct ac_llvm_compiler         compiler; /* only
> non-threaded
> > > > >
> > > > > compilation
> > > > > > */
> > > > > >       struct si_shader_ctx_state      fixed_func_tcs_shader;
> > > > > >       struct si_resource              *wait_mem_scratch;
> > > > > >       unsigned                        wait_mem_number;
> > > > > >       uint16_t                        prefetch_L2_mask;
> > > > > >
> > > > > > +     bool                            has_graphics;
> > > > > >       bool                            gfx_flush_in_progress:1;
> > > > > >       bool                            gfx_last_ib_is_busy:1;
> > > > > >       bool                            compute_is_busy:1;
> > > > > >
> > > > > >       unsigned                        num_gfx_cs_flushes;
> > > > > >       unsigned                        initial_gfx_cs_size;
> > > > > >       unsigned                        gpu_reset_counter;
> > > > > >       unsigned                        last_dirty_tex_counter;
> > > > > >       unsigned
> last_compressed_colortex_counter;
> > > > > >       unsigned                        last_num_draw_calls;
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > > > > > b/src/gallium/drivers/radeonsi/si_state.c
> > > > > > index b49a1b3695e..458b108a7e3 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_state.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > > > > > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
> > > > > > pipe_context *ctx, unsigned flags)
> > > > > >
> > > > > >       si_update_fb_dirtiness_after_rendering(sctx);
> > > > > >
> > > > > >       /* Multisample surfaces are flushed in
> si_decompress_textures. */
> > > > > >       if (sctx->framebuffer.uncompressed_cb_mask)
> > > > > >               si_make_CB_shader_coherent(sctx,
> > > > >
> > > > > sctx->framebuffer.nr_samples,
> > > > > >
> > > > >
> > > > > sctx->framebuffer.CB_has_shader_readable_metadata);
> > > > > >  }
> > > > > >
> > > > > >  /* This only ensures coherency for shader image/buffer stores.
> */
> > > > > > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
> > > > > > flags)
> > > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> > > > > >  {
> > > > > >       struct si_context *sctx = (struct si_context *)ctx;
> > > > > >
> > > > > >       /* Subsequent commands must wait for all shader
> invocations to
> > > > > >        * complete. */
> > > > > >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > > >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > >
> > > > > >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
> > > > > >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> > > > > > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct
> si_context
> > > > > > *sctx)
> > > > > >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
> > > > > >
> > > > > >       sctx->b.set_sample_mask = si_set_sample_mask;
> > > > > >
> > > > > >       sctx->b.create_vertex_elements_state =
> si_create_vertex_elements;
> > > > > >       sctx->b.bind_vertex_elements_state =
> si_bind_vertex_elements;
> > > > > >       sctx->b.delete_vertex_elements_state =
> si_delete_vertex_element;
> > > > > >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
> > > > > >
> > > > > >       sctx->b.texture_barrier = si_texture_barrier;
> > > > > > -     sctx->b.memory_barrier = si_memory_barrier;
> > > > > >       sctx->b.set_min_samples = si_set_min_samples;
> > > > > >       sctx->b.set_tess_state = si_set_tess_state;
> > > > > >
> > > > > >       sctx->b.set_active_query_state = si_set_active_query_state;
> > > > > >
> > > > > >       si_init_config(sctx);
> > > > > >  }
> > > > > >
> > > > > >  void si_init_screen_state_functions(struct si_screen *sscreen)
> > > > > >  {
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.h
> > > > > > b/src/gallium/drivers/radeonsi/si_state.h
> > > > > > index 767e789276a..6faa4c511b1 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_state.h
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_state.h
> > > > > > @@ -482,20 +482,21 @@ void
> si_set_active_descriptors_for_shader(struct
> > > > > > si_context *sctx,
> > > > > >                                         struct si_shader_selector
> > > > >
> > > > > *sel);
> > > > > >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
> > > > > >                                            struct pb_slab_entry
> > > > >
> > > > > *entry);
> > > > > >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv,
> > > > >
> > > > > unsigned
> > > > > > heap,
> > > > > >                                                 unsigned
> entry_size,
> > > > > >                                                 unsigned
> group_index);
> > > > > >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
> > > > > > *pslab);
> > > > > >  void si_rebind_buffer(struct si_context *sctx, struct
> pipe_resource
> > > > > > *buf,
> > > > > >                     uint64_t old_va);
> > > > > >  /* si_state.c */
> > > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned
> flags);
> > > > > >  void si_init_state_functions(struct si_context *sctx);
> > > > > >  void si_init_screen_state_functions(struct si_screen *sscreen);
> > > > > >  void
> > > > > >  si_make_buffer_descriptor(struct si_screen *screen, struct
> > > > >
> > > > > si_resource
> > > > > > *buf,
> > > > > >                         enum pipe_format format,
> > > > > >                         unsigned offset, unsigned size,
> > > > > >                         uint32_t *state);
> > > > > >  void
> > > > > >  si_make_texture_descriptor(struct si_screen *screen,
> > > > > >                          struct si_texture *tex,
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > index 9c968e39c2c..2a514f144b9 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
> > > > > > si_context *sctx,
> > > > > >
> > > > >
> > > > >  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
> > > > > >               }
> > > > > >       }
> > > > > >  }
> > > > > >
> > > > > >  static void si_emit_surface_sync(struct si_context *sctx,
> > > > > >                                unsigned cp_coher_cntl)
> > > > > >  {
> > > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > > >
> > > > > > -     if (sctx->chip_class >= GFX9) {
> > > > > > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
> > > > > >               /* Flush caches and wait for the caches to assert
> idle.
> > > > >
> > > > > */
> > > > > >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
> > > > > >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
> > > > > >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
> > > > > >               radeon_emit(cs, 0xffffff);      /*
> CP_COHER_SIZE_HI */
> > > > > >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
> > > > > >               radeon_emit(cs, 0);             /*
> CP_COHER_BASE_HI */
> > > > > >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
> > > > > >       } else {
> > > > > >               /* ACQUIRE_MEM is only required on a compute ring.
> */
> > > > > > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
> > > > > > si_context *sctx,
> > > > > >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE
> */
> > > > > >               radeon_emit(cs, 0);               /* CP_COHER_BASE
> */
> > > > > >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL
> */
> > > > > >       }
> > > > > >  }
> > > > > >
> > > > > >  void si_emit_cache_flush(struct si_context *sctx)
> > > > > >  {
> > > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > > >       uint32_t flags = sctx->flags;
> > > > > > +
> > > > > > +     if (!sctx->has_graphics) {
> > > > > > +             /* Only process compute flags. */
> > > > > > +             flags &= SI_CONTEXT_INV_ICACHE |
> > > > > > +                      SI_CONTEXT_INV_SMEM_L1 |
> > > > > > +                      SI_CONTEXT_INV_VMEM_L1 |
> > > > > > +                      SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> > > > > > +                      SI_CONTEXT_INV_L2_METADATA |
> > > > > > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > > +     }
> > > > > > +
> > > > > >       uint32_t cp_coher_cntl = 0;
> > > > > >       uint32_t flush_cb_db = flags &
> (SI_CONTEXT_FLUSH_AND_INV_CB |
> > > > > >
>  SI_CONTEXT_FLUSH_AND_INV_DB);
> > > > > >
> > > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
> > > > > >               sctx->num_cb_cache_flushes++;
> > > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
> > > > > >               sctx->num_db_cache_flushes++;
> > > > > >
> > > > > >       /* SI has a bug that it always flushes ICACHE and KCACHE if
> > > > >
> > > > > either
> > > > > > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct
> si_context
> > > > > > *sctx)
> > > > > >                                 EOP_DATA_SEL_VALUE_32BIT,
> > > > > >                                 sctx->wait_mem_scratch, va,
> > > > > >                                 sctx->wait_mem_number,
> SI_NOT_QUERY);
> > > > > >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
> > > > >
> > > > > 0xffffffff,
> > > > > >                              WAIT_REG_MEM_EQUAL);
> > > > > >       }
> > > > > >
> > > > > >       /* Make sure ME is idle (it executes most packets) before
> > > > >
> > > > > continuing.
> > > > > >        * This prevents read-after-write hazards between PFP and
> ME.
> > > > > >        */
> > > > > > -     if (cp_coher_cntl ||
> > > > > > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > > -                         SI_CONTEXT_INV_VMEM_L1 |
> > > > > > -                         SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> > > > > > +     if (sctx->has_graphics &&
> > > > > > +         (cp_coher_cntl ||
> > > > > > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > > +                    SI_CONTEXT_INV_VMEM_L1 |
> > > > > > +                    SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
> > > > > >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> > > > > >               radeon_emit(cs, 0);
> > > > > >       }
> > > > > >
> > > > > >       /* SI-CI-VI only:
> > > > > >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
> > > > > > SURFACE_SYNC
> > > > > >        *   waits for idle, so it should be last. SURFACE_SYNC is
> done
> > > > >
> > > > > in
> > > > > > PFP.
> > > > > >        *
> > > > > >        * cp_coher_cntl should contain all necessary flags except
> TC
> > > > >
> > > > > flags
> > > > > >        * at this point.
> > > > > > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > index a50088d2d8f..581f90a7b2f 100644
> > > > > > --- a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > +++ b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
> > > > > > si_screen *sscreen,
> > > > > >   *   compressed tiled
> > > > > >   *
> > > > > >   * \param sctx  the current context if you have one, or
> > > > > > sscreen->aux_context
> > > > > >   *              if you don't.
> > > > > >   */
> > > > > >  bool si_texture_disable_dcc(struct si_context *sctx,
> > > > > >                           struct si_texture *tex)
> > > > > >  {
> > > > > >       struct si_screen *sscreen = sctx->screen;
> > > > > >
> > > > > > +     if (!sctx->has_graphics)
> > > > > > +             return si_texture_discard_dcc(sscreen, tex);
> > > > > > +
> > > > > >       if (!si_can_disable_dcc(tex))
> > > > > >               return false;
> > > > > >
> > > > > >       if (&sctx->b == sscreen->aux_context)
> > > > > >               mtx_lock(&sscreen->aux_context_lock);
> > > > > >
> > > > > >       /* Decompress DCC. */
> > > > > >       si_decompress_dcc(sctx, tex);
> > > > > >       sctx->b.flush(&sctx->b, NULL, 0);
> > > >
> > > > _______________________________________________
> > > > mesa-dev mailing list
> > > > mesa-dev@lists.freedesktop.org
> > > >
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-dev&amp;data=02%7C01%7Cjan.vesely%40cs.rutgers.edu%7C9490f91b0c3f45a70d5d08d69c430b00%7Cb92d2b234d35447093ff69aca6632ffe%7C1%7C0%7C636868209108039866&amp;sdata=yy6mnl04artBw7IdTt%2Bep4liICCm1EwgDYM%2FPM35U7U%3D&amp;reserved=0
> > >
> > >
>
> --
> Jan Vesely <jan.vesely@rutgers.edu>
On Thu, 2019-02-28 at 17:48 -0500, Marek Olšák wrote:
> On Thu, Feb 28, 2019 at 4:44 AM Jan Vesely <jan.vesely@rutgers.edu> wrote:
> 
> > On Tue, 2019-02-26 at 18:34 -0500, Marek Olšák wrote:
> > > I ran a simple test verifying that compute is working properly on the
> > > compute ring.
> > 
> > I guess this was not on raven? With his patch I no loner see gfx
> > timeout but the apps still hang. anyway that's a separate issue.
> > 
> 
> If clover hangs, gfx timeouts are now compute timeouts, which might not be
> printed in dmesg. It's still a hang, it just doesn't always affect gfx.

thanks, that one has been bisected and identified [0].
This patch however causes hangs in cl-api-enqueue-copy-buffer on all
GCN hw I got to test (raven, carrizo, iceland).

Jan

[0] https://bugs.freedesktop.org/show_bug.cgi?id=109649

> 
> Marek
> 
> 
> > 
> > > 
> > > When clover is using compute rings, it doesn't stall/block graphics
> > > operations.
> > 
> > I'd be nice to include this information in the commit message.
> > 
> > Jan
> > 
> > > 
> > > Marek
> > > 
> > > On Tue, Feb 26, 2019 at 4:10 PM Jan Vesely <jan.vesely@rutgers.edu>
> > 
> > wrote:
> > > 
> > > > Can you add a bit of background why clover should/should not use other
> > > > rings?
> > > > 
> > > > I planned to test this, but my raven system can't run clover since
> > 
> > kernel
> > > > 4.20 release (BZ 109649), so I need to bisect that first.
> > > > Can this patch help address the soft lockup issue on CIK (BZ 108879)?
> > > > presumably, it was tested using clover on CIK, right?
> > > > 
> > > > Jan
> > > > 
> > > > On Tue, Feb 26, 2019 at 3:00 PM Marek Olšák <maraeo@gmail.com> wrote:
> > > > 
> > > > > I'll just push it.
> > > > > 
> > > > > Marek
> > > > > 
> > > > > On Mon, Feb 25, 2019 at 9:37 PM Dieter Nützel <Dieter@nuetzel-hh.de>
> > > > > wrote:
> > > > > 
> > > > > > Hello Marek,
> > > > > > 
> > > > > > this series need a rebase (if you have some time).
> > > > > > 
> > > > > > Dieter
> > > > > > 
> > > > > > Am 12.02.2019 19:12, schrieb Marek Olšák:
> > > > > > > From: Marek Olšák <marek.olsak@amd.com>
> > > > > > > 
> > > > > > > initialize all non-compute context functions to NULL.
> > > > > > > 
> > > > > > > v2: fix SI
> > > > > > > ---
> > > > > > >  src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
> > > > > > >  src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
> > > > > > >  src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
> > > > > > >  src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
> > > > > > >  src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
> > > > > > >  src/gallium/drivers/radeonsi/si_pipe.c        | 95
> > 
> > +++++++++++--------
> > > > > > >  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
> > > > > > >  src/gallium/drivers/radeonsi/si_state.c       |  3 +-
> > > > > > >  src/gallium/drivers/radeonsi/si_state.h       |  1 +
> > > > > > >  src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
> > > > > > >  src/gallium/drivers/radeonsi/si_texture.c     |  3 +
> > > > > > >  11 files changed, 130 insertions(+), 75 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > > index bb8d1cbd12d..f39cb5d143f 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_blit.c
> > > > > > > @@ -1345,25 +1345,31 @@ static void si_flush_resource(struct
> > > > > > > pipe_context *ctx,
> > > > > > > 
> > > > > > >               if (separate_dcc_dirty) {
> > > > > > >                       tex->separate_dcc_dirty = false;
> > > > > > > 
> > 
> >  vi_separate_dcc_process_and_reset_stats(ctx,
> > > > > > 
> > > > > > tex);
> > > > > > >               }
> > > > > > >       }
> > > > > > >  }
> > > > > > > 
> > > > > > >  void si_decompress_dcc(struct si_context *sctx, struct
> > 
> > si_texture
> > > > > > > *tex)
> > > > > > >  {
> > > > > > > -     if (!tex->dcc_offset)
> > > > > > > +     /* If graphics is disabled, we can't decompress DCC, but it
> > > > > > 
> > > > > > shouldn't
> > > > > > > +      * be compressed either. The caller should simply discard
> > 
> > it.
> > > > > > > +      */
> > > > > > > +     if (!tex->dcc_offset || !sctx->has_graphics)
> > > > > > >               return;
> > > > > > > 
> > > > > > >       si_blit_decompress_color(sctx, tex, 0,
> > > > > > 
> > > > > > tex->buffer.b.b.last_level,
> > > > > > >                                0,
> > 
> > util_max_layer(&tex->buffer.b.b, 0),
> > > > > > >                                true);
> > > > > > >  }
> > > > > > > 
> > > > > > >  void si_init_blit_functions(struct si_context *sctx)
> > > > > > >  {
> > > > > > >       sctx->b.resource_copy_region = si_resource_copy_region;
> > > > > > > -     sctx->b.blit = si_blit;
> > > > > > > -     sctx->b.flush_resource = si_flush_resource;
> > > > > > > -     sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > > > +
> > > > > > > +     if (sctx->has_graphics) {
> > > > > > > +             sctx->b.blit = si_blit;
> > > > > > > +             sctx->b.flush_resource = si_flush_resource;
> > > > > > > +             sctx->b.generate_mipmap = si_generate_mipmap;
> > > > > > > +     }
> > > > > > >  }
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > > index 9a00bb73b94..e1805f2a1c9 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_clear.c
> > > > > > > @@ -764,15 +764,18 @@ static void si_clear_texture(struct
> > 
> > pipe_context
> > > > > > > *pipe,
> > > > > > >                       util_clear_render_target(pipe, sf, &color,
> > > > > > >                                                box->x, box->y,
> > > > > > >                                                box->width,
> > > > > > 
> > > > > > box->height);
> > > > > > >               }
> > > > > > >       }
> > > > > > >       pipe_surface_reference(&sf, NULL);
> > > > > > >  }
> > > > > > > 
> > > > > > >  void si_init_clear_functions(struct si_context *sctx)
> > > > > > >  {
> > > > > > > -     sctx->b.clear = si_clear;
> > > > > > >       sctx->b.clear_render_target = si_clear_render_target;
> > > > > > > -     sctx->b.clear_depth_stencil = si_clear_depth_stencil;
> > > > > > >       sctx->b.clear_texture = si_clear_texture;
> > > > > > > +
> > > > > > > +     if (sctx->has_graphics) {
> > > > > > > +             sctx->b.clear = si_clear;
> > > > > > > +             sctx->b.clear_depth_stencil =
> > 
> > si_clear_depth_stencil;
> > > > > > > +     }
> > > > > > >  }
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > > index 1a62b3e0844..87addd53976 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_compute.c
> > > > > > > @@ -880,26 +880,28 @@ static void si_launch_grid(
> > > > > > >               info->block[0] * info->block[1] * info->block[2] >
> > 
> > 256;
> > > > > > > 
> > > > > > >       if (cs_regalloc_hang)
> > > > > > >               sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > > > >                                SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > > > 
> > > > > > >       if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
> > > > > > >           program->shader.compilation_failed)
> > > > > > >               return;
> > > > > > > 
> > > > > > > -     if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> > > > > > > -             si_update_fb_dirtiness_after_rendering(sctx);
> > > > > > > -             sctx->last_num_draw_calls = sctx->num_draw_calls;
> > > > > > > -     }
> > > > > > > +     if (sctx->has_graphics) {
> > > > > > > +             if (sctx->last_num_draw_calls !=
> > 
> > sctx->num_draw_calls) {
> > > > > > > +
> > 
> >  si_update_fb_dirtiness_after_rendering(sctx);
> > > > > > > +                     sctx->last_num_draw_calls =
> > 
> > sctx->num_draw_calls;
> > > > > > > +             }
> > > > > > > 
> > > > > > > -     si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> > > > > > > +             si_decompress_textures(sctx, 1 <<
> > 
> > PIPE_SHADER_COMPUTE);
> > > > > > > +     }
> > > > > > > 
> > > > > > >       /* Add buffer sizes for memory checking in need_cs_space.
> > 
> > */
> > > > > > >       si_context_add_resource_size(sctx,
> > 
> > &program->shader.bo->b.b);
> > > > > > >       /* TODO: add the scratch buffer */
> > > > > > > 
> > > > > > >       if (info->indirect) {
> > > > > > >               si_context_add_resource_size(sctx, info->indirect);
> > > > > > > 
> > > > > > >               /* Indirect buffers use TC L2 on GFX9, but not
> > 
> > older hw.
> > > > > > 
> > > > > > */
> > > > > > >               if (sctx->chip_class <= VI &&
> > > > > > > @@ -917,21 +919,22 @@ static void si_launch_grid(
> > > > > > >       if (sctx->flags)
> > > > > > >               si_emit_cache_flush(sctx);
> > > > > > > 
> > > > > > >       if (!si_switch_compute_shader(sctx, program,
> > 
> > &program->shader,
> > > > > > >                                       code_object, info->pc))
> > > > > > >               return;
> > > > > > > 
> > > > > > >       si_upload_compute_shader_descriptors(sctx);
> > > > > > >       si_emit_compute_shader_pointers(sctx);
> > > > > > > 
> > > > > > > -     if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > > > > +     if (sctx->has_graphics &&
> > > > > > > +         si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
> > > > > > >               sctx->atoms.s.render_cond.emit(sctx);
> > > > > > >               si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond,
> > > > > > 
> > > > > > false);
> > > > > > >       }
> > > > > > > 
> > > > > > >       if ((program->input_size ||
> > > > > > >              program->ir_type == PIPE_SHADER_IR_NATIVE) &&
> > > > > > >             unlikely(!si_upload_compute_input(sctx, code_object,
> > > > > > > info))) {
> > > > > > >               return;
> > > > > > >       }
> > > > > > > 
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > > index 21d4ca946d3..0f22c55723c 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> > > > > > > @@ -2640,22 +2640,24 @@ void
> > > > > > > si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
> > > > > > > 
> > > > > > >       sctx->num_resident_handles += num_resident_tex_handles +
> > > > > > >                                       num_resident_img_handles;
> > > > > > >  }
> > > > > > > 
> > > > > > >  /* INIT/DEINIT/UPLOAD */
> > > > > > > 
> > > > > > >  void si_init_all_descriptors(struct si_context *sctx)
> > > > > > >  {
> > > > > > >       int i;
> > > > > > > +     unsigned first_shader =
> > > > > > > +             sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
> > > > > > > 
> > > > > > > -     for (i = 0; i < SI_NUM_SHADERS; i++) {
> > > > > > > +     for (i = first_shader; i < SI_NUM_SHADERS; i++) {
> > > > > > >               bool is_2nd = sctx->chip_class >= GFX9 &&
> > > > > > >                                    (i == PIPE_SHADER_TESS_CTRL ||
> > > > > > >                                     i == PIPE_SHADER_GEOMETRY);
> > > > > > >               unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
> > > > > > 
> > > > > > SI_NUM_SAMPLERS;
> > > > > > >               unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
> > > > > > > SI_NUM_CONST_BUFFERS;
> > > > > > >               int rel_dw_offset;
> > > > > > >               struct si_descriptors *desc;
> > > > > > > 
> > > > > > >               if (is_2nd) {
> > > > > > >                       if (i == PIPE_SHADER_TESS_CTRL) {
> > > > > > > @@ -2714,30 +2716,34 @@ void si_init_all_descriptors(struct
> > 
> > si_context
> > > > > > > *sctx)
> > > > > > >       si_init_bindless_descriptors(sctx,
> > 
> > &sctx->bindless_descriptors,
> > > > > > > 
> > > > > > 
> > > > > > SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
> > > > > > >                                    1024);
> > > > > > > 
> > > > > > >       sctx->descriptors_dirty = u_bit_consecutive(0,
> > 
> > SI_NUM_DESCS);
> > > > > > > 
> > > > > > >       /* Set pipe_context functions. */
> > > > > > >       sctx->b.bind_sampler_states = si_bind_sampler_states;
> > > > > > >       sctx->b.set_shader_images = si_set_shader_images;
> > > > > > >       sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
> > > > > > > -     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > > > >       sctx->b.set_shader_buffers = si_set_shader_buffers;
> > > > > > >       sctx->b.set_sampler_views = si_set_sampler_views;
> > > > > > >       sctx->b.create_texture_handle = si_create_texture_handle;
> > > > > > >       sctx->b.delete_texture_handle = si_delete_texture_handle;
> > > > > > >       sctx->b.make_texture_handle_resident =
> > > > > > > si_make_texture_handle_resident;
> > > > > > >       sctx->b.create_image_handle = si_create_image_handle;
> > > > > > >       sctx->b.delete_image_handle = si_delete_image_handle;
> > > > > > >       sctx->b.make_image_handle_resident =
> > > > > > 
> > > > > > si_make_image_handle_resident;
> > > > > > > 
> > > > > > > +     if (!sctx->has_graphics)
> > > > > > > +             return;
> > > > > > > +
> > > > > > > +     sctx->b.set_polygon_stipple = si_set_polygon_stipple;
> > > > > > > +
> > > > > > >       /* Shader user data. */
> > > > > > >       sctx->atoms.s.shader_pointers.emit =
> > > > > > > si_emit_graphics_shader_pointers;
> > > > > > > 
> > > > > > >       /* Set default and immutable mappings. */
> > > > > > >       si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
> > > > > > > R_00B130_SPI_SHADER_USER_DATA_VS_0);
> > > > > > > 
> > > > > > >       if (sctx->chip_class >= GFX9) {
> > > > > > >               si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
> > > > > > > 
> > > > > > 
> > > > > >  R_00B430_SPI_SHADER_USER_DATA_LS_0);
> > > > > > >               si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > > index 3d64587fa2b..d0e7cf20b4c 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> > > > > > > @@ -103,27 +103,29 @@ void si_flush_gfx_cs(struct si_context
> > 
> > *ctx,
> > > > > > > unsigned flags,
> > > > > > >        * This code is only needed when the driver flushes the
> > 
> > GFX IB
> > > > > > >        * internally, and it never asks for a fence handle.
> > > > > > >        */
> > > > > > >       if (radeon_emitted(ctx->dma_cs, 0)) {
> > > > > > >               assert(fence == NULL); /* internal flushes only */
> > > > > > >               si_flush_dma_cs(ctx, flags, NULL);
> > > > > > >       }
> > > > > > > 
> > > > > > >       ctx->gfx_flush_in_progress = true;
> > > > > > > 
> > > > > > > -     if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > > > -             si_suspend_queries(ctx);
> > > > > > > -
> > > > > > > -     ctx->streamout.suspended = false;
> > > > > > > -     if (ctx->streamout.begin_emitted) {
> > > > > > > -             si_emit_streamout_end(ctx);
> > > > > > > -             ctx->streamout.suspended = true;
> > > > > > > +     if (ctx->has_graphics) {
> > > > > > > +             if (!LIST_IS_EMPTY(&ctx->active_queries))
> > > > > > > +                     si_suspend_queries(ctx);
> > > > > > > +
> > > > > > > +             ctx->streamout.suspended = false;
> > > > > > > +             if (ctx->streamout.begin_emitted) {
> > > > > > > +                     si_emit_streamout_end(ctx);
> > > > > > > +                     ctx->streamout.suspended = true;
> > > > > > > +             }
> > > > > > >       }
> > > > > > > 
> > > > > > >       /* Make sure CP DMA is idle at the end of IBs after L2
> > 
> > prefetches
> > > > > > >        * because the kernel doesn't wait for it. */
> > > > > > >       if (ctx->chip_class >= CIK)
> > > > > > >               si_cp_dma_wait_for_idle(ctx);
> > > > > > > 
> > > > > > >       /* Wait for draw calls to finish if needed. */
> > > > > > >       if (wait_flags) {
> > > > > > >               ctx->flags |= wait_flags;
> > > > > > > @@ -209,20 +211,29 @@ void si_begin_new_gfx_cs(struct si_context
> > 
> > *ctx)
> > > > > > >        * IB starts drawing.
> > > > > > >        *
> > > > > > >        * TODO: Do we also need to invalidate CB & DB caches?
> > > > > > >        */
> > > > > > >       ctx->flags |= SI_CONTEXT_INV_ICACHE |
> > > > > > >                     SI_CONTEXT_INV_SMEM_L1 |
> > > > > > >                     SI_CONTEXT_INV_VMEM_L1 |
> > > > > > >                     SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > >                     SI_CONTEXT_START_PIPELINE_STATS;
> > > > > > > 
> > > > > > > +     ctx->cs_shader_state.initialized = false;
> > > > > > > +     si_all_descriptors_begin_new_cs(ctx);
> > > > > > > +     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > > > +
> > > > > > > +     if (!ctx->has_graphics) {
> > > > > > > +             ctx->initial_gfx_cs_size =
> > 
> > ctx->gfx_cs->current.cdw;
> > > > > > > +             return;
> > > > > > > +     }
> > > > > > > +
> > > > > > >       /* set all valid group as dirty so they get reemited on
> > > > > > >        * next draw command
> > > > > > >        */
> > > > > > >       si_pm4_reset_emitted(ctx);
> > > > > > > 
> > > > > > >       /* The CS initialization should be emitted before
> > 
> > everything
> > > > > > 
> > > > > > else. */
> > > > > > >       si_pm4_emit(ctx, ctx->init_config);
> > > > > > >       if (ctx->init_config_gs_rings)
> > > > > > >               si_pm4_emit(ctx, ctx->init_config_gs_rings);
> > > > > > > 
> > > > > > > @@ -273,22 +284,20 @@ void si_begin_new_gfx_cs(struct si_context
> > 
> > *ctx)
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
> > > > > > >       if (ctx->chip_class >= GFX9)
> > > > > > >               si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
> > > > > > >       /* CLEAR_STATE disables all window rectangles. */
> > > > > > >       if (!has_clear_state || ctx->num_window_rectangles > 0)
> > > > > > >               si_mark_atom_dirty(ctx,
> > 
> > &ctx->atoms.s.window_rectangles);
> > > > > > > -     si_all_descriptors_begin_new_cs(ctx);
> > > > > > > -     si_all_resident_buffers_begin_new_cs(ctx);
> > > > > > > 
> > > > > > >       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > > > >       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
> > > > > > >       ctx->viewports.depth_range_dirty_mask = (1 <<
> > 
> > SI_MAX_VIEWPORTS)
> > > > > > 
> > > > > > - 1;
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
> > > > > > > 
> > > > > > >       si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
> > > > > > >       if (ctx->scratch_buffer) {
> > > > > > > @@ -316,22 +325,20 @@ void si_begin_new_gfx_cs(struct si_context
> > 
> > *ctx)
> > > > > > >       ctx->last_multi_vgt_param = -1;
> > > > > > >       ctx->last_rast_prim = -1;
> > > > > > >       ctx->last_sc_line_stipple = ~0;
> > > > > > >       ctx->last_vs_state = ~0;
> > > > > > >       ctx->last_ls = NULL;
> > > > > > >       ctx->last_tcs = NULL;
> > > > > > >       ctx->last_tes_sh_base = -1;
> > > > > > >       ctx->last_num_tcs_input_cp = -1;
> > > > > > >       ctx->last_ls_hs_config = -1; /* impossible value */
> > > > > > > 
> > > > > > > -     ctx->cs_shader_state.initialized = false;
> > > > > > > -
> > > > > > >       if (has_clear_state) {
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] =
> > > > > > > 0x00000000;
> > > > > > > 
> > 
> >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL]
> > > > > > 
> > > > > > =
> > > > > > > 0x00000000;
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] =
> > > > > > > 0x00000000;
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] =
> > > > > > > 0x00000000;
> > > > > > > 
> > 
> >  ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] =
> > > > > > 
> > > > > > 0xffffffff;
> > > > > > > 
> > 
> >  ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] =
> > > > > > 
> > > > > > 0x00000000;
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] =
> > > > > > > 0x00000000;
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] =
> > > > > > > 0x00000000;
> > > > > > > 
> > > > > > 
> > > > > >  ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] =
> > > > > > > 0x00000000;
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > > index 20767c806d2..c2ec664d5a4 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> > > > > > > @@ -381,61 +381,56 @@ static struct pipe_context
> > > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > > >  {
> > > > > > >       struct si_context *sctx = CALLOC_STRUCT(si_context);
> > > > > > >       struct si_screen* sscreen = (struct si_screen *)screen;
> > > > > > >       struct radeon_winsys *ws = sscreen->ws;
> > > > > > >       int shader, i;
> > > > > > >       bool stop_exec_on_failure = (flags &
> > > > > > > PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
> > > > > > > 
> > > > > > >       if (!sctx)
> > > > > > >               return NULL;
> > > > > > > 
> > > > > > > +     sctx->has_graphics = sscreen->info.chip_class == SI ||
> > > > > > > +                          !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
> > > > > > > +
> > > > > > >       if (flags & PIPE_CONTEXT_DEBUG)
> > > > > > >               sscreen->record_llvm_ir = true; /* racy but not
> > 
> > critical
> > > > > > 
> > > > > > */
> > > > > > > 
> > > > > > >       sctx->b.screen = screen; /* this must be set first */
> > > > > > >       sctx->b.priv = NULL;
> > > > > > >       sctx->b.destroy = si_destroy_context;
> > > > > > > -     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > > > -     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > > > -     sctx->b.set_log_context = si_set_log_context;
> > > > > > > -     sctx->b.set_context_param = si_set_context_param;
> > > > > > >       sctx->screen = sscreen; /* Easy accessing of
> > 
> > screen/winsys. */
> > > > > > >       sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
> > > > > > > 
> > > > > > >       slab_create_child(&sctx->pool_transfers,
> > > > > > 
> > > > > > &sscreen->pool_transfers);
> > > > > > >       slab_create_child(&sctx->pool_transfers_unsync,
> > > > > > > &sscreen->pool_transfers);
> > > > > > > 
> > > > > > >       sctx->ws = sscreen->ws;
> > > > > > >       sctx->family = sscreen->info.family;
> > > > > > >       sctx->chip_class = sscreen->info.chip_class;
> > > > > > > 
> > > > > > >       if (sscreen->info.has_gpu_reset_counter_query) {
> > > > > > >               sctx->gpu_reset_counter =
> > > > > > >                       sctx->ws->query_value(sctx->ws,
> > > > > > 
> > > > > > RADEON_GPU_RESET_COUNTER);
> > > > > > >       }
> > > > > > > 
> > > > > > > -     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > > > -     sctx->b.set_device_reset_callback =
> > 
> > si_set_device_reset_callback;
> > > > > > > -
> > > > > > > -     si_init_context_texture_functions(sctx);
> > > > > > > -     si_init_query_functions(sctx);
> > > > > > > 
> > > > > > >       if (sctx->chip_class == CIK ||
> > > > > > >           sctx->chip_class == VI ||
> > > > > > >           sctx->chip_class == GFX9) {
> > > > > > >               sctx->eop_bug_scratch = si_resource(
> > > > > > >                       pipe_buffer_create(&sscreen->b, 0,
> > > > > > 
> > > > > > PIPE_USAGE_DEFAULT,
> > > > > > >                                          16 *
> > > > > > 
> > > > > > sscreen->info.num_render_backends));
> > > > > > >               if (!sctx->eop_bug_scratch)
> > > > > > >                       goto fail;
> > > > > > >       }
> > > > > > > 
> > > > > > > +     /* Initialize context allocators. */
> > > > > > >       sctx->allocator_zeroed_memory =
> > > > > > >               u_suballocator_create(&sctx->b, 128 * 1024,
> > > > > > >                                     0, PIPE_USAGE_DEFAULT,
> > > > > > >                                     SI_RESOURCE_FLAG_UNMAPPABLE |
> > > > > > >                                     SI_RESOURCE_FLAG_CLEAR,
> > 
> > false);
> > > > > > >       if (!sctx->allocator_zeroed_memory)
> > > > > > >               goto fail;
> > > > > > > 
> > > > > > >       sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 *
> > 
> > 1024,
> > > > > > >                                                   0,
> > 
> > PIPE_USAGE_STREAM,
> > > > > > > @@ -459,38 +454,22 @@ static struct pipe_context
> > > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > > >       sctx->ctx = sctx->ws->ctx_create(sctx->ws);
> > > > > > >       if (!sctx->ctx)
> > > > > > >               goto fail;
> > > > > > > 
> > > > > > >       if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags
> > 
> > &
> > > > > > > DBG(NO_ASYNC_DMA))) {
> > > > > > >               sctx->dma_cs = sctx->ws->cs_create(sctx->ctx,
> > 
> > RING_DMA,
> > > > > > > 
> > > > > > 
> > > > > > (void*)si_flush_dma_cs,
> > > > > > >                                                  sctx,
> > > > > > 
> > > > > > stop_exec_on_failure);
> > > > > > >       }
> > > > > > > 
> > > > > > > -     si_init_buffer_functions(sctx);
> > > > > > > -     si_init_clear_functions(sctx);
> > > > > > > -     si_init_blit_functions(sctx);
> > > > > > > -     si_init_compute_functions(sctx);
> > > > > > > -     si_init_compute_blit_functions(sctx);
> > > > > > > -     si_init_debug_functions(sctx);
> > > > > > > -     si_init_msaa_functions(sctx);
> > > > > > > -     si_init_streamout_functions(sctx);
> > > > > > > -
> > > > > > > -     if (sscreen->info.has_hw_decode) {
> > > > > > > -             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > > > -             sctx->b.create_video_buffer =
> > 
> > si_video_buffer_create;
> > > > > > > -     } else {
> > > > > > > -             sctx->b.create_video_codec = vl_create_decoder;
> > > > > > > -             sctx->b.create_video_buffer =
> > 
> > vl_video_buffer_create;
> > > > > > > -     }
> > > > > > > -
> > > > > > > -     sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> > > > > > > +     sctx->gfx_cs = ws->cs_create(sctx->ctx,
> > > > > > > +                                  sctx->has_graphics ? RING_GFX
> > 
> > :
> > > > > > 
> > > > > > RING_COMPUTE,
> > > > > > >                                    (void*)si_flush_gfx_cs, sctx,
> > > > > > 
> > > > > > stop_exec_on_failure);
> > > > > > > 
> > > > > > >       /* Border colors. */
> > > > > > >       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
> > > > > > > 
> > > > > > 
> > > > > >  sizeof(*sctx->border_color_table));
> > > > > > >       if (!sctx->border_color_table)
> > > > > > >               goto fail;
> > > > > > > 
> > > > > > >       sctx->border_color_buffer = si_resource(
> > > > > > >               pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> > > > > > > @@ -498,43 +477,76 @@ static struct pipe_context
> > > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > > > 
> > 
> > sizeof(*sctx->border_color_table)));
> > > > > > >       if (!sctx->border_color_buffer)
> > > > > > >               goto fail;
> > > > > > > 
> > > > > > >       sctx->border_color_map =
> > > > > > >               ws->buffer_map(sctx->border_color_buffer->buf,
> > > > > > >                              NULL, PIPE_TRANSFER_WRITE);
> > > > > > >       if (!sctx->border_color_map)
> > > > > > >               goto fail;
> > > > > > > 
> > > > > > > +     /* Initialize context functions used by graphics and
> > 
> > compute. */
> > > > > > > +     sctx->b.emit_string_marker = si_emit_string_marker;
> > > > > > > +     sctx->b.set_debug_callback = si_set_debug_callback;
> > > > > > > +     sctx->b.set_log_context = si_set_log_context;
> > > > > > > +     sctx->b.set_context_param = si_set_context_param;
> > > > > > > +     sctx->b.get_device_reset_status = si_get_reset_status;
> > > > > > > +     sctx->b.set_device_reset_callback =
> > 
> > si_set_device_reset_callback;
> > > > > > > +     sctx->b.memory_barrier = si_memory_barrier;
> > > > > > > +
> > > > > > >       si_init_all_descriptors(sctx);
> > > > > > > +     si_init_buffer_functions(sctx);
> > > > > > > +     si_init_clear_functions(sctx);
> > > > > > > +     si_init_blit_functions(sctx);
> > > > > > > +     si_init_compute_functions(sctx);
> > > > > > > +     si_init_compute_blit_functions(sctx);
> > > > > > > +     si_init_debug_functions(sctx);
> > > > > > >       si_init_fence_functions(sctx);
> > > > > > > -     si_init_state_functions(sctx);
> > > > > > > -     si_init_shader_functions(sctx);
> > > > > > > -     si_init_viewport_functions(sctx);
> > > > > > > -
> > > > > > > -     if (sctx->chip_class >= CIK)
> > > > > > > -             cik_init_sdma_functions(sctx);
> > > > > > > -     else
> > > > > > > -             si_init_dma_functions(sctx);
> > > > > > > 
> > > > > > >       if (sscreen->debug_flags & DBG(FORCE_DMA))
> > > > > > >               sctx->b.resource_copy_region = sctx->dma_copy;
> > > > > > > 
> > > > > > > -     sctx->blitter = util_blitter_create(&sctx->b);
> > > > > > > -     if (sctx->blitter == NULL)
> > > > > > > -             goto fail;
> > > > > > > -     sctx->blitter->skip_viewport_restore = true;
> > > > > > > +     /* Initialize graphics-only context functions. */
> > > > > > > +     if (sctx->has_graphics) {
> > > > > > > +             si_init_context_texture_functions(sctx);
> > > > > > > +             si_init_query_functions(sctx);
> > > > > > > +             si_init_msaa_functions(sctx);
> > > > > > > +             si_init_shader_functions(sctx);
> > > > > > > +             si_init_state_functions(sctx);
> > > > > > > +             si_init_streamout_functions(sctx);
> > > > > > > +             si_init_viewport_functions(sctx);
> > > > > > > +
> > > > > > > +             sctx->blitter = util_blitter_create(&sctx->b);
> > > > > > > +             if (sctx->blitter == NULL)
> > > > > > > +                     goto fail;
> > > > > > > +             sctx->blitter->skip_viewport_restore = true;
> > > > > > > 
> > > > > > > -     si_init_draw_functions(sctx);
> > > > > > > +             si_init_draw_functions(sctx);
> > > > > > > +     }
> > > > > > > +
> > > > > > > +     /* Initialize SDMA functions. */
> > > > > > > +     if (sctx->chip_class >= CIK)
> > > > > > > +             cik_init_sdma_functions(sctx);
> > > > > > > +     else
> > > > > > > +             si_init_dma_functions(sctx);
> > > > > > > 
> > > > > > >       sctx->sample_mask = 0xffff;
> > > > > > > 
> > > > > > > +     /* Initialize multimedia functions. */
> > > > > > > +     if (sscreen->info.has_hw_decode) {
> > > > > > > +             sctx->b.create_video_codec = si_uvd_create_decoder;
> > > > > > > +             sctx->b.create_video_buffer =
> > 
> > si_video_buffer_create;
> > > > > > > +     } else {
> > > > > > > +             sctx->b.create_video_codec = vl_create_decoder;
> > > > > > > +             sctx->b.create_video_buffer =
> > 
> > vl_video_buffer_create;
> > > > > > > +     }
> > > > > > > +
> > > > > > >       if (sctx->chip_class >= GFX9) {
> > > > > > >               sctx->wait_mem_scratch = si_resource(
> > > > > > >                       pipe_buffer_create(screen, 0,
> > > > > > 
> > > > > > PIPE_USAGE_DEFAULT, 4));
> > > > > > >               if (!sctx->wait_mem_scratch)
> > > > > > >                       goto fail;
> > > > > > > 
> > > > > > >               /* Initialize the memory. */
> > > > > > >               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0,
> > 
> > 4,
> > > > > > >                                V_370_MEM, V_370_ME,
> > > > > > 
> > > > > > &sctx->wait_mem_number);
> > > > > > >       }
> > > > > > > @@ -544,21 +556,22 @@ static struct pipe_context
> > > > > > > *si_create_context(struct pipe_screen *screen,
> > > > > > >       if (sctx->chip_class == CIK) {
> > > > > > >               sctx->null_const_buf.buffer =
> > > > > > >                       pipe_aligned_buffer_create(screen,
> > > > > > > 
> > > > > > 
> > > > > > SI_RESOURCE_FLAG_32BIT,
> > > > > > > 
> > 
> > PIPE_USAGE_DEFAULT,
> > > > > > 
> > > > > > 16,
> > > > > > > 
> > > > > > 
> > > > > > sctx->screen->info.tcc_cache_line_size);
> > > > > > >               if (!sctx->null_const_buf.buffer)
> > > > > > >                       goto fail;
> > > > > > >               sctx->null_const_buf.buffer_size =
> > > > > > > sctx->null_const_buf.buffer->width0;
> > > > > > > 
> > > > > > > -             for (shader = 0; shader < SI_NUM_SHADERS;
> > 
> > shader++) {
> > > > > > > +             unsigned start_shader = sctx->has_graphics ? 0 :
> > > > > > > PIPE_SHADER_COMPUTE;
> > > > > > > +             for (shader = start_shader; shader <
> > 
> > SI_NUM_SHADERS;
> > > > > > 
> > > > > > shader++) {
> > > > > > >                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++)
> > 
> > {
> > > > > > > 
> > 
> >  sctx->b.set_constant_buffer(&sctx->b,
> > > > > > 
> > > > > > shader, i,
> > > > > > > 
> > > > > > 
> > > > > >  &sctx->null_const_buf);
> > > > > > >                       }
> > > > > > >               }
> > > > > > > 
> > > > > > >               si_set_rw_buffer(sctx,
> > 
> > SI_HS_CONST_DEFAULT_TESS_LEVELS,
> > > > > > >                                &sctx->null_const_buf);
> > > > > > >               si_set_rw_buffer(sctx,
> > 
> > SI_VS_CONST_INSTANCE_DIVISORS,
> > > > > > >                                &sctx->null_const_buf);
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > > b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > > index b01d5744752..348e8e5bd26 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> > > > > > > @@ -777,21 +777,21 @@ struct si_saved_cs {
> > > > > > >  };
> > > > > > > 
> > > > > > >  struct si_context {
> > > > > > >       struct pipe_context             b; /* base class */
> > > > > > > 
> > > > > > >       enum radeon_family              family;
> > > > > > >       enum chip_class                 chip_class;
> > > > > > > 
> > > > > > >       struct radeon_winsys            *ws;
> > > > > > >       struct radeon_winsys_ctx        *ctx;
> > > > > > > -     struct radeon_cmdbuf            *gfx_cs;
> > > > > > > +     struct radeon_cmdbuf            *gfx_cs; /* compute IB if
> > > > > > 
> > > > > > graphics is disabled
> > > > > > > */
> > > > > > >       struct radeon_cmdbuf            *dma_cs;
> > > > > > >       struct pipe_fence_handle        *last_gfx_fence;
> > > > > > >       struct pipe_fence_handle        *last_sdma_fence;
> > > > > > >       struct si_resource              *eop_bug_scratch;
> > > > > > >       struct u_upload_mgr             *cached_gtt_allocator;
> > > > > > >       struct threaded_context         *tc;
> > > > > > >       struct u_suballocator           *allocator_zeroed_memory;
> > > > > > >       struct slab_child_pool          pool_transfers;
> > > > > > >       struct slab_child_pool          pool_transfers_unsync; /*
> > 
> > for
> > > > > > > threaded_context */
> > > > > > >       struct pipe_device_reset_callback device_reset_callback;
> > > > > > > @@ -815,20 +815,21 @@ struct si_context {
> > > > > > >       void                            *cs_clear_render_target;
> > > > > > >       void
> > 
> > *cs_clear_render_target_1d_array;
> > > > > > >       struct si_screen                *screen;
> > > > > > >       struct pipe_debug_callback      debug;
> > > > > > >       struct ac_llvm_compiler         compiler; /* only
> > 
> > non-threaded
> > > > > > 
> > > > > > compilation
> > > > > > > */
> > > > > > >       struct si_shader_ctx_state      fixed_func_tcs_shader;
> > > > > > >       struct si_resource              *wait_mem_scratch;
> > > > > > >       unsigned                        wait_mem_number;
> > > > > > >       uint16_t                        prefetch_L2_mask;
> > > > > > > 
> > > > > > > +     bool                            has_graphics;
> > > > > > >       bool                            gfx_flush_in_progress:1;
> > > > > > >       bool                            gfx_last_ib_is_busy:1;
> > > > > > >       bool                            compute_is_busy:1;
> > > > > > > 
> > > > > > >       unsigned                        num_gfx_cs_flushes;
> > > > > > >       unsigned                        initial_gfx_cs_size;
> > > > > > >       unsigned                        gpu_reset_counter;
> > > > > > >       unsigned                        last_dirty_tex_counter;
> > > > > > >       unsigned
> > 
> > last_compressed_colortex_counter;
> > > > > > >       unsigned                        last_num_draw_calls;
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_state.c
> > > > > > > index b49a1b3695e..458b108a7e3 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_state.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > > > > > > @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct
> > > > > > > pipe_context *ctx, unsigned flags)
> > > > > > > 
> > > > > > >       si_update_fb_dirtiness_after_rendering(sctx);
> > > > > > > 
> > > > > > >       /* Multisample surfaces are flushed in
> > 
> > si_decompress_textures. */
> > > > > > >       if (sctx->framebuffer.uncompressed_cb_mask)
> > > > > > >               si_make_CB_shader_coherent(sctx,
> > > > > > 
> > > > > > sctx->framebuffer.nr_samples,
> > > > > > > 
> > > > > > 
> > > > > > sctx->framebuffer.CB_has_shader_readable_metadata);
> > > > > > >  }
> > > > > > > 
> > > > > > >  /* This only ensures coherency for shader image/buffer stores.
> > 
> > */
> > > > > > > -static void si_memory_barrier(struct pipe_context *ctx, unsigned
> > > > > > > flags)
> > > > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> > > > > > >  {
> > > > > > >       struct si_context *sctx = (struct si_context *)ctx;
> > > > > > > 
> > > > > > >       /* Subsequent commands must wait for all shader
> > 
> > invocations to
> > > > > > >        * complete. */
> > > > > > >       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > > > > > >                        SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > > > 
> > > > > > >       if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
> > > > > > >               sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> > > > > > > @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct
> > 
> > si_context
> > > > > > > *sctx)
> > > > > > >       sctx->b.sampler_view_destroy = si_sampler_view_destroy;
> > > > > > > 
> > > > > > >       sctx->b.set_sample_mask = si_set_sample_mask;
> > > > > > > 
> > > > > > >       sctx->b.create_vertex_elements_state =
> > 
> > si_create_vertex_elements;
> > > > > > >       sctx->b.bind_vertex_elements_state =
> > 
> > si_bind_vertex_elements;
> > > > > > >       sctx->b.delete_vertex_elements_state =
> > 
> > si_delete_vertex_element;
> > > > > > >       sctx->b.set_vertex_buffers = si_set_vertex_buffers;
> > > > > > > 
> > > > > > >       sctx->b.texture_barrier = si_texture_barrier;
> > > > > > > -     sctx->b.memory_barrier = si_memory_barrier;
> > > > > > >       sctx->b.set_min_samples = si_set_min_samples;
> > > > > > >       sctx->b.set_tess_state = si_set_tess_state;
> > > > > > > 
> > > > > > >       sctx->b.set_active_query_state = si_set_active_query_state;
> > > > > > > 
> > > > > > >       si_init_config(sctx);
> > > > > > >  }
> > > > > > > 
> > > > > > >  void si_init_screen_state_functions(struct si_screen *sscreen)
> > > > > > >  {
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state.h
> > > > > > > b/src/gallium/drivers/radeonsi/si_state.h
> > > > > > > index 767e789276a..6faa4c511b1 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_state.h
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_state.h
> > > > > > > @@ -482,20 +482,21 @@ void
> > 
> > si_set_active_descriptors_for_shader(struct
> > > > > > > si_context *sctx,
> > > > > > >                                         struct si_shader_selector
> > > > > > 
> > > > > > *sel);
> > > > > > >  bool si_bindless_descriptor_can_reclaim_slab(void *priv,
> > > > > > >                                            struct pb_slab_entry
> > > > > > 
> > > > > > *entry);
> > > > > > >  struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv,
> > > > > > 
> > > > > > unsigned
> > > > > > > heap,
> > > > > > >                                                 unsigned
> > 
> > entry_size,
> > > > > > >                                                 unsigned
> > 
> > group_index);
> > > > > > >  void si_bindless_descriptor_slab_free(void *priv, struct pb_slab
> > > > > > > *pslab);
> > > > > > >  void si_rebind_buffer(struct si_context *sctx, struct
> > 
> > pipe_resource
> > > > > > > *buf,
> > > > > > >                     uint64_t old_va);
> > > > > > >  /* si_state.c */
> > > > > > > +void si_memory_barrier(struct pipe_context *ctx, unsigned
> > 
> > flags);
> > > > > > >  void si_init_state_functions(struct si_context *sctx);
> > > > > > >  void si_init_screen_state_functions(struct si_screen *sscreen);
> > > > > > >  void
> > > > > > >  si_make_buffer_descriptor(struct si_screen *screen, struct
> > > > > > 
> > > > > > si_resource
> > > > > > > *buf,
> > > > > > >                         enum pipe_format format,
> > > > > > >                         unsigned offset, unsigned size,
> > > > > > >                         uint32_t *state);
> > > > > > >  void
> > > > > > >  si_make_texture_descriptor(struct si_screen *screen,
> > > > > > >                          struct si_texture *tex,
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > > index 9c968e39c2c..2a514f144b9 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> > > > > > > @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct
> > > > > > > si_context *sctx,
> > > > > > > 
> > > > > > 
> > > > > >  S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
> > > > > > >               }
> > > > > > >       }
> > > > > > >  }
> > > > > > > 
> > > > > > >  static void si_emit_surface_sync(struct si_context *sctx,
> > > > > > >                                unsigned cp_coher_cntl)
> > > > > > >  {
> > > > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > > > > 
> > > > > > > -     if (sctx->chip_class >= GFX9) {
> > > > > > > +     if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
> > > > > > >               /* Flush caches and wait for the caches to assert
> > 
> > idle.
> > > > > > 
> > > > > > */
> > > > > > >               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
> > > > > > >               radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
> > > > > > >               radeon_emit(cs, 0xffffffff);    /* CP_COHER_SIZE */
> > > > > > >               radeon_emit(cs, 0xffffff);      /*
> > 
> > CP_COHER_SIZE_HI */
> > > > > > >               radeon_emit(cs, 0);             /* CP_COHER_BASE */
> > > > > > >               radeon_emit(cs, 0);             /*
> > 
> > CP_COHER_BASE_HI */
> > > > > > >               radeon_emit(cs, 0x0000000A);    /* POLL_INTERVAL */
> > > > > > >       } else {
> > > > > > >               /* ACQUIRE_MEM is only required on a compute ring.
> > 
> > */
> > > > > > > @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct
> > > > > > > si_context *sctx,
> > > > > > >               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE
> > 
> > */
> > > > > > >               radeon_emit(cs, 0);               /* CP_COHER_BASE
> > 
> > */
> > > > > > >               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL
> > 
> > */
> > > > > > >       }
> > > > > > >  }
> > > > > > > 
> > > > > > >  void si_emit_cache_flush(struct si_context *sctx)
> > > > > > >  {
> > > > > > >       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > > > > > >       uint32_t flags = sctx->flags;
> > > > > > > +
> > > > > > > +     if (!sctx->has_graphics) {
> > > > > > > +             /* Only process compute flags. */
> > > > > > > +             flags &= SI_CONTEXT_INV_ICACHE |
> > > > > > > +                      SI_CONTEXT_INV_SMEM_L1 |
> > > > > > > +                      SI_CONTEXT_INV_VMEM_L1 |
> > > > > > > +                      SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > > +                      SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> > > > > > > +                      SI_CONTEXT_INV_L2_METADATA |
> > > > > > > +                      SI_CONTEXT_CS_PARTIAL_FLUSH;
> > > > > > > +     }
> > > > > > > +
> > > > > > >       uint32_t cp_coher_cntl = 0;
> > > > > > >       uint32_t flush_cb_db = flags &
> > 
> > (SI_CONTEXT_FLUSH_AND_INV_CB |
> > > > > > > 
> > 
> >  SI_CONTEXT_FLUSH_AND_INV_DB);
> > > > > > > 
> > > > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
> > > > > > >               sctx->num_cb_cache_flushes++;
> > > > > > >       if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
> > > > > > >               sctx->num_db_cache_flushes++;
> > > > > > > 
> > > > > > >       /* SI has a bug that it always flushes ICACHE and KCACHE if
> > > > > > 
> > > > > > either
> > > > > > > @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct
> > 
> > si_context
> > > > > > > *sctx)
> > > > > > >                                 EOP_DATA_SEL_VALUE_32BIT,
> > > > > > >                                 sctx->wait_mem_scratch, va,
> > > > > > >                                 sctx->wait_mem_number,
> > 
> > SI_NOT_QUERY);
> > > > > > >               si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number,
> > > > > > 
> > > > > > 0xffffffff,
> > > > > > >                              WAIT_REG_MEM_EQUAL);
> > > > > > >       }
> > > > > > > 
> > > > > > >       /* Make sure ME is idle (it executes most packets) before
> > > > > > 
> > > > > > continuing.
> > > > > > >        * This prevents read-after-write hazards between PFP and
> > 
> > ME.
> > > > > > >        */
> > > > > > > -     if (cp_coher_cntl ||
> > > > > > > -         (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > > > -                         SI_CONTEXT_INV_VMEM_L1 |
> > > > > > > -                         SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > > -                         SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> > > > > > > +     if (sctx->has_graphics &&
> > > > > > > +         (cp_coher_cntl ||
> > > > > > > +          (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> > > > > > > +                    SI_CONTEXT_INV_VMEM_L1 |
> > > > > > > +                    SI_CONTEXT_INV_GLOBAL_L2 |
> > > > > > > +                    SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
> > > > > > >               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> > > > > > >               radeon_emit(cs, 0);
> > > > > > >       }
> > > > > > > 
> > > > > > >       /* SI-CI-VI only:
> > > > > > >        *   When one of the CP_COHER_CNTL.DEST_BASE flags is set,
> > > > > > > SURFACE_SYNC
> > > > > > >        *   waits for idle, so it should be last. SURFACE_SYNC is
> > 
> > done
> > > > > > 
> > > > > > in
> > > > > > > PFP.
> > > > > > >        *
> > > > > > >        * cp_coher_cntl should contain all necessary flags except
> > 
> > TC
> > > > > > 
> > > > > > flags
> > > > > > >        * at this point.
> > > > > > > diff --git a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > > b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > > index a50088d2d8f..581f90a7b2f 100644
> > > > > > > --- a/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > > +++ b/src/gallium/drivers/radeonsi/si_texture.c
> > > > > > > @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct
> > > > > > > si_screen *sscreen,
> > > > > > >   *   compressed tiled
> > > > > > >   *
> > > > > > >   * \param sctx  the current context if you have one, or
> > > > > > > sscreen->aux_context
> > > > > > >   *              if you don't.
> > > > > > >   */
> > > > > > >  bool si_texture_disable_dcc(struct si_context *sctx,
> > > > > > >                           struct si_texture *tex)
> > > > > > >  {
> > > > > > >       struct si_screen *sscreen = sctx->screen;
> > > > > > > 
> > > > > > > +     if (!sctx->has_graphics)
> > > > > > > +             return si_texture_discard_dcc(sscreen, tex);
> > > > > > > +
> > > > > > >       if (!si_can_disable_dcc(tex))
> > > > > > >               return false;
> > > > > > > 
> > > > > > >       if (&sctx->b == sscreen->aux_context)
> > > > > > >               mtx_lock(&sscreen->aux_context_lock);
> > > > > > > 
> > > > > > >       /* Decompress DCC. */
> > > > > > >       si_decompress_dcc(sctx, tex);
> > > > > > >       sctx->b.flush(&sctx->b, NULL, 0);
> > > > > 
> > > > > _______________________________________________
> > > > > mesa-dev mailing list
> > > > > mesa-dev@lists.freedesktop.org
> > > > > 
> > 
> > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-dev&amp;data=02%7C01%7Cjan.vesely%40cs.rutgers.edu%7C9490f91b0c3f45a70d5d08d69c430b00%7Cb92d2b234d35447093ff69aca6632ffe%7C1%7C0%7C636868209108039866&amp;sdata=yy6mnl04artBw7IdTt%2Bep4liICCm1EwgDYM%2FPM35U7U%3D&amp;reserved=0
> > > > 
> > > > 
> > 
> > --
> > Jan Vesely <jan.vesely@rutgers.edu>
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


On 2019-04-02 12:39 a.m., Marek Olšák wrote:
> On Mon, Apr 1, 2019 at 1:28 PM Jan Vesely <jan.vesely@rutgers.edu> wrote:
>> On Mon, 2019-04-01 at 12:30 -0400, Marek Olšák wrote:
>>> Does the attached patch fix the copy-buffer test?
>>
>> it does thanks.
>> Won't the compute only context still need some synchronization?
>> Is there anything else to guarantee that the data is in place after
>> return from resource_copy_region ?
> 
> The synchronization is the same on gfx and compute rings.

BTW, did you see https://bugs.freedesktop.org/show_bug.cgi?id=110214#c24
? It does indicate some kind of synchronization issue between
si_resource_copy_region using a compute ring and other operations using
a GFX ring.

On 2019-04-02 2:57 p.m., Marek Olšák wrote:
> On Tue, Apr 2, 2019 at 4:57 AM Michel Dänzer <michel@daenzer.net> wrote:
>> On 2019-04-02 12:39 a.m., Marek Olšák wrote:
>>> On Mon, Apr 1, 2019 at 1:28 PM Jan Vesely <jan.vesely@rutgers.edu>
>> wrote:
>>>> On Mon, 2019-04-01 at 12:30 -0400, Marek Olšák wrote:
>>>>> Does the attached patch fix the copy-buffer test?
>>>>
>>>> it does thanks.
>>>> Won't the compute only context still need some synchronization?
>>>> Is there anything else to guarantee that the data is in place after
>>>> return from resource_copy_region ?
>>>
>>> The synchronization is the same on gfx and compute rings.
>>
>> BTW, did you see https://bugs.freedesktop.org/show_bug.cgi?id=110214#c24
>> ? It does indicate some kind of synchronization issue between
>> si_resource_copy_region using a compute ring and other operations using
>> a GFX ring.
> 
> Only OpenCL uses the compute ring. xterm doesn't invoke OpenCL AFAIK.

That bugzilla comment is about the GTK menu issue, not about xterm.
Anyway, I doubt GTK uses OpenCL either, and neither does glamor, so it
was probably an invalid bisect result then.


Thanks,
On 2019-04-02 4:00 p.m., Michel Dänzer wrote:
> On 2019-04-02 2:57 p.m., Marek Olšák wrote:
>> On Tue, Apr 2, 2019 at 4:57 AM Michel Dänzer <michel@daenzer.net> wrote:
>>> On 2019-04-02 12:39 a.m., Marek Olšák wrote:
>>>> On Mon, Apr 1, 2019 at 1:28 PM Jan Vesely <jan.vesely@rutgers.edu>
>>> wrote:
>>>>> On Mon, 2019-04-01 at 12:30 -0400, Marek Olšák wrote:
>>>>>> Does the attached patch fix the copy-buffer test?
>>>>>
>>>>> it does thanks.
>>>>> Won't the compute only context still need some synchronization?
>>>>> Is there anything else to guarantee that the data is in place after
>>>>> return from resource_copy_region ?
>>>>
>>>> The synchronization is the same on gfx and compute rings.
>>>
>>> BTW, did you see https://bugs.freedesktop.org/show_bug.cgi?id=110214#c24
>>> ? It does indicate some kind of synchronization issue between
>>> si_resource_copy_region using a compute ring and other operations using
>>> a GFX ring.
>>
>> Only OpenCL uses the compute ring. xterm doesn't invoke OpenCL AFAIK.
> 
> That bugzilla comment is about the GTK menu issue, not about xterm.
> Anyway, I doubt GTK uses OpenCL either, and neither does glamor, so it
> was probably an invalid bisect result then.

Apparently not, after all:

https://bugs.freedesktop.org/110355

Looks like there is some issue with si_compute_copy_image, even if it
doesn't use a compute ring.
On Mon, Apr 8, 2019 at 10:46 AM Michel Dänzer <michel@daenzer.net> wrote:
>
> On 2019-04-02 4:00 p.m., Michel Dänzer wrote:
> > On 2019-04-02 2:57 p.m., Marek Olšák wrote:
> >> On Tue, Apr 2, 2019 at 4:57 AM Michel Dänzer <michel@daenzer.net> wrote:
> >>> On 2019-04-02 12:39 a.m., Marek Olšák wrote:
> >>>> On Mon, Apr 1, 2019 at 1:28 PM Jan Vesely <jan.vesely@rutgers.edu>
> >>> wrote:
> >>>>> On Mon, 2019-04-01 at 12:30 -0400, Marek Olšák wrote:
> >>>>>> Does the attached patch fix the copy-buffer test?
> >>>>>
> >>>>> it does thanks.
> >>>>> Won't the compute only context still need some synchronization?
> >>>>> Is there anything else to guarantee that the data is in place after
> >>>>> return from resource_copy_region ?
> >>>>
> >>>> The synchronization is the same on gfx and compute rings.
> >>>
> >>> BTW, did you see https://bugs.freedesktop.org/show_bug.cgi?id=110214#c24
> >>> ? It does indicate some kind of synchronization issue between
> >>> si_resource_copy_region using a compute ring and other operations using
> >>> a GFX ring.
> >>
> >> Only OpenCL uses the compute ring. xterm doesn't invoke OpenCL AFAIK.
> >
> > That bugzilla comment is about the GTK menu issue, not about xterm.
> > Anyway, I doubt GTK uses OpenCL either, and neither does glamor, so it
> > was probably an invalid bisect result then.
>
> Apparently not, after all:
>
> https://bugs.freedesktop.org/110355
>
> Looks like there is some issue with si_compute_copy_image, even if it
> doesn't use a compute ring.

Then there's also the old problem of SI hangs when using compute shared clears:
https://bugs.freedesktop.org/show_bug.cgi?id=108879

Jan

>
>
> --
> Earthling Michel Dänzer               |              https://www.amd.com
> Libre software enthusiast             |             Mesa and X developer
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev