[1/2] radeonsi: always use compute rings for clover on CI and newer

Submitted by Marek Olšák on Feb. 11, 2019, 8:27 p.m.

Details

Message ID 20190211202704.5049-1-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 11, 2019, 8:27 p.m.
From: Marek Olšák <marek.olsak@amd.com>

initialize all non-compute context functions to NULL.
---
 src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
 src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
 src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
 src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
 src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
 src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
 src/gallium/drivers/radeonsi/si_state.c       |  3 +-
 src/gallium/drivers/radeonsi/si_state.h       |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
 src/gallium/drivers/radeonsi/si_texture.c     |  3 +
 11 files changed, 130 insertions(+), 75 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index bb8d1cbd12d..f39cb5d143f 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -1345,25 +1345,31 @@  static void si_flush_resource(struct pipe_context *ctx,
 
 		if (separate_dcc_dirty) {
 			tex->separate_dcc_dirty = false;
 			vi_separate_dcc_process_and_reset_stats(ctx, tex);
 		}
 	}
 }
 
 void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex)
 {
-	if (!tex->dcc_offset)
+	/* If graphics is disabled, we can't decompress DCC, but it shouldn't
+	 * be compressed either. The caller should simply discard it.
+	 */
+	if (!tex->dcc_offset || !sctx->has_graphics)
 		return;
 
 	si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level,
 				 0, util_max_layer(&tex->buffer.b.b, 0),
 				 true);
 }
 
 void si_init_blit_functions(struct si_context *sctx)
 {
 	sctx->b.resource_copy_region = si_resource_copy_region;
-	sctx->b.blit = si_blit;
-	sctx->b.flush_resource = si_flush_resource;
-	sctx->b.generate_mipmap = si_generate_mipmap;
+
+	if (sctx->has_graphics) {
+		sctx->b.blit = si_blit;
+		sctx->b.flush_resource = si_flush_resource;
+		sctx->b.generate_mipmap = si_generate_mipmap;
+	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 9a00bb73b94..e1805f2a1c9 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -764,15 +764,18 @@  static void si_clear_texture(struct pipe_context *pipe,
 			util_clear_render_target(pipe, sf, &color,
 						 box->x, box->y,
 						 box->width, box->height);
 		}
 	}
 	pipe_surface_reference(&sf, NULL);
 }
 
 void si_init_clear_functions(struct si_context *sctx)
 {
-	sctx->b.clear = si_clear;
 	sctx->b.clear_render_target = si_clear_render_target;
-	sctx->b.clear_depth_stencil = si_clear_depth_stencil;
 	sctx->b.clear_texture = si_clear_texture;
+
+	if (sctx->has_graphics) {
+		sctx->b.clear = si_clear;
+		sctx->b.clear_depth_stencil = si_clear_depth_stencil;
+	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 1a62b3e0844..87addd53976 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -880,26 +880,28 @@  static void si_launch_grid(
 		info->block[0] * info->block[1] * info->block[2] > 256;
 
 	if (cs_regalloc_hang)
 		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
 	    program->shader.compilation_failed)
 		return;
 
-	if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
-		si_update_fb_dirtiness_after_rendering(sctx);
-		sctx->last_num_draw_calls = sctx->num_draw_calls;
-	}
+	if (sctx->has_graphics) {
+		if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
+			si_update_fb_dirtiness_after_rendering(sctx);
+			sctx->last_num_draw_calls = sctx->num_draw_calls;
+		}
 
-	si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
+		si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
+	}
 
 	/* Add buffer sizes for memory checking in need_cs_space. */
 	si_context_add_resource_size(sctx, &program->shader.bo->b.b);
 	/* TODO: add the scratch buffer */
 
 	if (info->indirect) {
 		si_context_add_resource_size(sctx, info->indirect);
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
 		if (sctx->chip_class <= VI &&
@@ -917,21 +919,22 @@  static void si_launch_grid(
 	if (sctx->flags)
 		si_emit_cache_flush(sctx);
 
 	if (!si_switch_compute_shader(sctx, program, &program->shader,
 					code_object, info->pc))
 		return;
 
 	si_upload_compute_shader_descriptors(sctx);
 	si_emit_compute_shader_pointers(sctx);
 
-	if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
+	if (sctx->has_graphics &&
+	    si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
 		sctx->atoms.s.render_cond.emit(sctx);
 		si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
 	}
 
 	if ((program->input_size ||
             program->ir_type == PIPE_SHADER_IR_NATIVE) &&
            unlikely(!si_upload_compute_input(sctx, code_object, info))) {
 		return;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 21d4ca946d3..0f22c55723c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2640,22 +2640,24 @@  void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
 
 	sctx->num_resident_handles += num_resident_tex_handles +
 					num_resident_img_handles;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;
+	unsigned first_shader =
+		sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
 
-	for (i = 0; i < SI_NUM_SHADERS; i++) {
+	for (i = first_shader; i < SI_NUM_SHADERS; i++) {
 		bool is_2nd = sctx->chip_class >= GFX9 &&
 				     (i == PIPE_SHADER_TESS_CTRL ||
 				      i == PIPE_SHADER_GEOMETRY);
 		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
 		unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
 		int rel_dw_offset;
 		struct si_descriptors *desc;
 
 		if (is_2nd) {
 			if (i == PIPE_SHADER_TESS_CTRL) {
@@ -2714,30 +2716,34 @@  void si_init_all_descriptors(struct si_context *sctx)
 	si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
 				     SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
 				     1024);
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	/* Set pipe_context functions. */
 	sctx->b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.set_shader_images = si_set_shader_images;
 	sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
-	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.set_sampler_views = si_set_sampler_views;
 	sctx->b.create_texture_handle = si_create_texture_handle;
 	sctx->b.delete_texture_handle = si_delete_texture_handle;
 	sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
 	sctx->b.create_image_handle = si_create_image_handle;
 	sctx->b.delete_image_handle = si_delete_image_handle;
 	sctx->b.make_image_handle_resident = si_make_image_handle_resident;
 
+	if (!sctx->has_graphics)
+		return;
+
+	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
+
 	/* Shader user data. */
 	sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
 	if (sctx->chip_class >= GFX9) {
 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
 				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 3d64587fa2b..d0e7cf20b4c 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -103,27 +103,29 @@  void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	 * This code is only needed when the driver flushes the GFX IB
 	 * internally, and it never asks for a fence handle.
 	 */
 	if (radeon_emitted(ctx->dma_cs, 0)) {
 		assert(fence == NULL); /* internal flushes only */
 		si_flush_dma_cs(ctx, flags, NULL);
 	}
 
 	ctx->gfx_flush_in_progress = true;
 
-	if (!LIST_IS_EMPTY(&ctx->active_queries))
-		si_suspend_queries(ctx);
-
-	ctx->streamout.suspended = false;
-	if (ctx->streamout.begin_emitted) {
-		si_emit_streamout_end(ctx);
-		ctx->streamout.suspended = true;
+	if (ctx->has_graphics) {
+		if (!LIST_IS_EMPTY(&ctx->active_queries))
+			si_suspend_queries(ctx);
+
+		ctx->streamout.suspended = false;
+		if (ctx->streamout.begin_emitted) {
+			si_emit_streamout_end(ctx);
+			ctx->streamout.suspended = true;
+		}
 	}
 
 	/* Make sure CP DMA is idle at the end of IBs after L2 prefetches
 	 * because the kernel doesn't wait for it. */
 	if (ctx->chip_class >= CIK)
 		si_cp_dma_wait_for_idle(ctx);
 
 	/* Wait for draw calls to finish if needed. */
 	if (wait_flags) {
 		ctx->flags |= wait_flags;
@@ -209,20 +211,29 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	 * IB starts drawing.
 	 *
 	 * TODO: Do we also need to invalidate CB & DB caches?
 	 */
 	ctx->flags |= SI_CONTEXT_INV_ICACHE |
 		      SI_CONTEXT_INV_SMEM_L1 |
 		      SI_CONTEXT_INV_VMEM_L1 |
 		      SI_CONTEXT_INV_GLOBAL_L2 |
 		      SI_CONTEXT_START_PIPELINE_STATS;
 
+	ctx->cs_shader_state.initialized = false;
+	si_all_descriptors_begin_new_cs(ctx);
+	si_all_resident_buffers_begin_new_cs(ctx);
+
+	if (!ctx->has_graphics) {
+		ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
+		return;
+	}
+
 	/* set all valid group as dirty so they get reemited on
 	 * next draw command
 	 */
 	si_pm4_reset_emitted(ctx);
 
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
 	if (ctx->init_config_gs_rings)
 		si_pm4_emit(ctx, ctx->init_config_gs_rings);
 
@@ -273,22 +284,20 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
 	if (ctx->chip_class >= GFX9)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
 	/* CLEAR_STATE disables all window rectangles. */
 	if (!has_clear_state || ctx->num_window_rectangles > 0)
 		si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
-	si_all_descriptors_begin_new_cs(ctx);
-	si_all_resident_buffers_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
 
 	si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
 	if (ctx->scratch_buffer) {
@@ -316,22 +325,20 @@  void si_begin_new_gfx_cs(struct si_context *ctx)
 	ctx->last_multi_vgt_param = -1;
 	ctx->last_rast_prim = -1;
 	ctx->last_sc_line_stipple = ~0;
 	ctx->last_vs_state = ~0;
 	ctx->last_ls = NULL;
 	ctx->last_tcs = NULL;
 	ctx->last_tes_sh_base = -1;
 	ctx->last_num_tcs_input_cp = -1;
 	ctx->last_ls_hs_config = -1; /* impossible value */
 
-	ctx->cs_shader_state.initialized = false;
-
 	if (has_clear_state) {
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 20767c806d2..98c4fabc741 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -381,61 +381,56 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 {
 	struct si_context *sctx = CALLOC_STRUCT(si_context);
 	struct si_screen* sscreen = (struct si_screen *)screen;
 	struct radeon_winsys *ws = sscreen->ws;
 	int shader, i;
 	bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
 
 	if (!sctx)
 		return NULL;
 
+	sctx->has_graphics = sscreen->info.chip_class >= CIK &&
+			     !(flags & PIPE_CONTEXT_COMPUTE_ONLY);
+
 	if (flags & PIPE_CONTEXT_DEBUG)
 		sscreen->record_llvm_ir = true; /* racy but not critical */
 
 	sctx->b.screen = screen; /* this must be set first */
 	sctx->b.priv = NULL;
 	sctx->b.destroy = si_destroy_context;
-	sctx->b.emit_string_marker = si_emit_string_marker;
-	sctx->b.set_debug_callback = si_set_debug_callback;
-	sctx->b.set_log_context = si_set_log_context;
-	sctx->b.set_context_param = si_set_context_param;
 	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
 
 	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
 	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
 
 	sctx->ws = sscreen->ws;
 	sctx->family = sscreen->info.family;
 	sctx->chip_class = sscreen->info.chip_class;
 
 	if (sscreen->info.has_gpu_reset_counter_query) {
 		sctx->gpu_reset_counter =
 			sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);
 	}
 
-	sctx->b.get_device_reset_status = si_get_reset_status;
-	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
-
-	si_init_context_texture_functions(sctx);
-	si_init_query_functions(sctx);
 
 	if (sctx->chip_class == CIK ||
 	    sctx->chip_class == VI ||
 	    sctx->chip_class == GFX9) {
 		sctx->eop_bug_scratch = si_resource(
 			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
 					   16 * sscreen->info.num_render_backends));
 		if (!sctx->eop_bug_scratch)
 			goto fail;
 	}
 
+	/* Initialize context allocators. */
 	sctx->allocator_zeroed_memory =
 		u_suballocator_create(&sctx->b, 128 * 1024,
 				      0, PIPE_USAGE_DEFAULT,
 				      SI_RESOURCE_FLAG_UNMAPPABLE |
 				      SI_RESOURCE_FLAG_CLEAR, false);
 	if (!sctx->allocator_zeroed_memory)
 		goto fail;
 
 	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
 						    0, PIPE_USAGE_STREAM,
@@ -459,38 +454,22 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
 	if (!sctx->ctx)
 		goto fail;
 
 	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
 		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
 						   (void*)si_flush_dma_cs,
 						   sctx, stop_exec_on_failure);
 	}
 
-	si_init_buffer_functions(sctx);
-	si_init_clear_functions(sctx);
-	si_init_blit_functions(sctx);
-	si_init_compute_functions(sctx);
-	si_init_compute_blit_functions(sctx);
-	si_init_debug_functions(sctx);
-	si_init_msaa_functions(sctx);
-	si_init_streamout_functions(sctx);
-
-	if (sscreen->info.has_hw_decode) {
-		sctx->b.create_video_codec = si_uvd_create_decoder;
-		sctx->b.create_video_buffer = si_video_buffer_create;
-	} else {
-		sctx->b.create_video_codec = vl_create_decoder;
-		sctx->b.create_video_buffer = vl_video_buffer_create;
-	}
-
-	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
+	sctx->gfx_cs = ws->cs_create(sctx->ctx,
+				     sctx->has_graphics ? RING_GFX : RING_COMPUTE,
 				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
 
 	/* Border colors. */
 	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
 					  sizeof(*sctx->border_color_table));
 	if (!sctx->border_color_table)
 		goto fail;
 
 	sctx->border_color_buffer = si_resource(
 		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
@@ -498,43 +477,76 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 				   sizeof(*sctx->border_color_table)));
 	if (!sctx->border_color_buffer)
 		goto fail;
 
 	sctx->border_color_map =
 		ws->buffer_map(sctx->border_color_buffer->buf,
 			       NULL, PIPE_TRANSFER_WRITE);
 	if (!sctx->border_color_map)
 		goto fail;
 
+	/* Initialize context functions used by graphics and compute. */
+	sctx->b.emit_string_marker = si_emit_string_marker;
+	sctx->b.set_debug_callback = si_set_debug_callback;
+	sctx->b.set_log_context = si_set_log_context;
+	sctx->b.set_context_param = si_set_context_param;
+	sctx->b.get_device_reset_status = si_get_reset_status;
+	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
+	sctx->b.memory_barrier = si_memory_barrier;
+
 	si_init_all_descriptors(sctx);
+	si_init_buffer_functions(sctx);
+	si_init_clear_functions(sctx);
+	si_init_blit_functions(sctx);
+	si_init_compute_functions(sctx);
+	si_init_compute_blit_functions(sctx);
+	si_init_debug_functions(sctx);
 	si_init_fence_functions(sctx);
-	si_init_state_functions(sctx);
-	si_init_shader_functions(sctx);
-	si_init_viewport_functions(sctx);
-
-	if (sctx->chip_class >= CIK)
-		cik_init_sdma_functions(sctx);
-	else
-		si_init_dma_functions(sctx);
 
 	if (sscreen->debug_flags & DBG(FORCE_DMA))
 		sctx->b.resource_copy_region = sctx->dma_copy;
 
-	sctx->blitter = util_blitter_create(&sctx->b);
-	if (sctx->blitter == NULL)
-		goto fail;
-	sctx->blitter->skip_viewport_restore = true;
+	/* Initialize graphics-only context functions. */
+	if (sctx->has_graphics) {
+		si_init_context_texture_functions(sctx);
+		si_init_query_functions(sctx);
+		si_init_msaa_functions(sctx);
+		si_init_shader_functions(sctx);
+		si_init_state_functions(sctx);
+		si_init_streamout_functions(sctx);
+		si_init_viewport_functions(sctx);
+
+		sctx->blitter = util_blitter_create(&sctx->b);
+		if (sctx->blitter == NULL)
+			goto fail;
+		sctx->blitter->skip_viewport_restore = true;
 
-	si_init_draw_functions(sctx);
+		si_init_draw_functions(sctx);
+	}
+
+	/* Initialize SDMA functions. */
+	if (sctx->chip_class >= CIK)
+		cik_init_sdma_functions(sctx);
+	else
+		si_init_dma_functions(sctx);
 
 	sctx->sample_mask = 0xffff;
 
+	/* Initialize multimedia functions. */
+	if (sscreen->info.has_hw_decode) {
+		sctx->b.create_video_codec = si_uvd_create_decoder;
+		sctx->b.create_video_buffer = si_video_buffer_create;
+	} else {
+		sctx->b.create_video_codec = vl_create_decoder;
+		sctx->b.create_video_buffer = vl_video_buffer_create;
+	}
+
 	if (sctx->chip_class >= GFX9) {
 		sctx->wait_mem_scratch = si_resource(
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
 		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
 				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
 	}
@@ -544,21 +556,22 @@  static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (sctx->chip_class == CIK) {
 		sctx->null_const_buf.buffer =
 			pipe_aligned_buffer_create(screen,
 						   SI_RESOURCE_FLAG_32BIT,
 						   PIPE_USAGE_DEFAULT, 16,
 						   sctx->screen->info.tcc_cache_line_size);
 		if (!sctx->null_const_buf.buffer)
 			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
 
-		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+		unsigned start_shader = sctx->has_graphics ? 0 :  PIPE_SHADER_COMPUTE;
+		for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
 			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
 				sctx->b.set_constant_buffer(&sctx->b, shader, i,
 							      &sctx->null_const_buf);
 			}
 		}
 
 		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
 				 &sctx->null_const_buf);
 		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
 				 &sctx->null_const_buf);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b01d5744752..348e8e5bd26 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -777,21 +777,21 @@  struct si_saved_cs {
 };
 
 struct si_context {
 	struct pipe_context		b; /* base class */
 
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 
 	struct radeon_winsys		*ws;
 	struct radeon_winsys_ctx	*ctx;
-	struct radeon_cmdbuf		*gfx_cs;
+	struct radeon_cmdbuf		*gfx_cs; /* compute IB if graphics is disabled */
 	struct radeon_cmdbuf		*dma_cs;
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct pipe_fence_handle	*last_sdma_fence;
 	struct si_resource		*eop_bug_scratch;
 	struct u_upload_mgr		*cached_gtt_allocator;
 	struct threaded_context		*tc;
 	struct u_suballocator		*allocator_zeroed_memory;
 	struct slab_child_pool		pool_transfers;
 	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
 	struct pipe_device_reset_callback device_reset_callback;
@@ -815,20 +815,21 @@  struct si_context {
 	void				*cs_clear_render_target;
 	void				*cs_clear_render_target_1d_array;
 	struct si_screen		*screen;
 	struct pipe_debug_callback	debug;
 	struct ac_llvm_compiler		compiler; /* only non-threaded compilation */
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	struct si_resource		*wait_mem_scratch;
 	unsigned			wait_mem_number;
 	uint16_t			prefetch_L2_mask;
 
+	bool				has_graphics;
 	bool				gfx_flush_in_progress:1;
 	bool				gfx_last_ib_is_busy:1;
 	bool				compute_is_busy:1;
 
 	unsigned			num_gfx_cs_flushes;
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
 	unsigned			last_dirty_tex_counter;
 	unsigned			last_compressed_colortex_counter;
 	unsigned			last_num_draw_calls;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b49a1b3695e..458b108a7e3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4699,21 +4699,21 @@  static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 
 	si_update_fb_dirtiness_after_rendering(sctx);
 
 	/* Multisample surfaces are flushed in si_decompress_textures. */
 	if (sctx->framebuffer.uncompressed_cb_mask)
 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
 					   sctx->framebuffer.CB_has_shader_readable_metadata);
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
-static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
 	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
 		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
@@ -4813,21 +4813,20 @@  void si_init_state_functions(struct si_context *sctx)
 	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
 
 	sctx->b.set_sample_mask = si_set_sample_mask;
 
 	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
 	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
 	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
 	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
 
 	sctx->b.texture_barrier = si_texture_barrier;
-	sctx->b.memory_barrier = si_memory_barrier;
 	sctx->b.set_min_samples = si_set_min_samples;
 	sctx->b.set_tess_state = si_set_tess_state;
 
 	sctx->b.set_active_query_state = si_set_active_query_state;
 
 	si_init_config(sctx);
 }
 
 void si_init_screen_state_functions(struct si_screen *sscreen)
 {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 767e789276a..6faa4c511b1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -482,20 +482,21 @@  void si_set_active_descriptors_for_shader(struct si_context *sctx,
 					  struct si_shader_selector *sel);
 bool si_bindless_descriptor_can_reclaim_slab(void *priv,
 					     struct pb_slab_entry *entry);
 struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
 						  unsigned entry_size,
 						  unsigned group_index);
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 		      uint64_t old_va);
 /* si_state.c */
+void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);
 void
 si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state);
 void
 si_make_texture_descriptor(struct si_screen *screen,
 			   struct si_texture *tex,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 9c968e39c2c..2a514f144b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -872,21 +872,21 @@  static void si_emit_draw_packets(struct si_context *sctx,
 				        S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
 		}
 	}
 }
 
 static void si_emit_surface_sync(struct si_context *sctx,
 				 unsigned cp_coher_cntl)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
-	if (sctx->chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
 		/* Flush caches and wait for the caches to assert idle. */
 		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
 		radeon_emit(cs, cp_coher_cntl);	/* CP_COHER_CNTL */
 		radeon_emit(cs, 0xffffffff);	/* CP_COHER_SIZE */
 		radeon_emit(cs, 0xffffff);	/* CP_COHER_SIZE_HI */
 		radeon_emit(cs, 0);		/* CP_COHER_BASE */
 		radeon_emit(cs, 0);		/* CP_COHER_BASE_HI */
 		radeon_emit(cs, 0x0000000A);	/* POLL_INTERVAL */
 	} else {
 		/* ACQUIRE_MEM is only required on a compute ring. */
@@ -895,20 +895,32 @@  static void si_emit_surface_sync(struct si_context *sctx,
 		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
 		radeon_emit(cs, 0);               /* CP_COHER_BASE */
 		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
 	}
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint32_t flags = sctx->flags;
+
+	if (!sctx->has_graphics) {
+		/* Only process compute flags. */
+		flags &= SI_CONTEXT_INV_ICACHE |
+			 SI_CONTEXT_INV_SMEM_L1 |
+			 SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
+			 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
+			 SI_CONTEXT_INV_L2_METADATA |
+			 SI_CONTEXT_CS_PARTIAL_FLUSH;
+	}
+
 	uint32_t cp_coher_cntl = 0;
 	uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 					SI_CONTEXT_FLUSH_AND_INV_DB);
 
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
 		sctx->num_cb_cache_flushes++;
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
 		sctx->num_db_cache_flushes++;
 
 	/* SI has a bug that it always flushes ICACHE and KCACHE if either
@@ -1061,25 +1073,26 @@  void si_emit_cache_flush(struct si_context *sctx)
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  sctx->wait_mem_scratch, va,
 				  sctx->wait_mem_number, SI_NOT_QUERY);
 		si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
 			       WAIT_REG_MEM_EQUAL);
 	}
 
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
-	if (cp_coher_cntl ||
-	    (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
-			    SI_CONTEXT_INV_VMEM_L1 |
-			    SI_CONTEXT_INV_GLOBAL_L2 |
-			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
+	if (sctx->has_graphics &&
+	    (cp_coher_cntl ||
+	     (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       SI_CONTEXT_INV_VMEM_L1 |
+		       SI_CONTEXT_INV_GLOBAL_L2 |
+		       SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
 
 	/* SI-CI-VI only:
 	 *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC
 	 *   waits for idle, so it should be last. SURFACE_SYNC is done in PFP.
 	 *
 	 * cp_coher_cntl should contain all necessary flags except TC flags
 	 * at this point.
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index a50088d2d8f..581f90a7b2f 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -457,20 +457,23 @@  static bool si_texture_discard_dcc(struct si_screen *sscreen,
  *   compressed tiled
  *
  * \param sctx  the current context if you have one, or sscreen->aux_context
  *              if you don't.
  */
 bool si_texture_disable_dcc(struct si_context *sctx,
 			    struct si_texture *tex)
 {
 	struct si_screen *sscreen = sctx->screen;
 
+	if (!sctx->has_graphics)
+		return si_texture_discard_dcc(sscreen, tex);
+
 	if (!si_can_disable_dcc(tex))
 		return false;
 
 	if (&sctx->b == sscreen->aux_context)
 		mtx_lock(&sscreen->aux_context_lock);
 
 	/* Decompress DCC. */
 	si_decompress_dcc(sctx, tex);
 	sctx->b.flush(&sctx->b, NULL, 0);
 

Comments

On 11.02.19 21:27, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak@amd.com>
> 
> initialize all non-compute context functions to NULL.
> ---
>   src/gallium/drivers/radeonsi/si_blit.c        | 14 ++-
>   src/gallium/drivers/radeonsi/si_clear.c       |  7 +-
>   src/gallium/drivers/radeonsi/si_compute.c     | 15 +--
>   src/gallium/drivers/radeonsi/si_descriptors.c | 10 +-
>   src/gallium/drivers/radeonsi/si_gfx_cs.c      | 29 +++---
>   src/gallium/drivers/radeonsi/si_pipe.c        | 95 +++++++++++--------
>   src/gallium/drivers/radeonsi/si_pipe.h        |  3 +-
>   src/gallium/drivers/radeonsi/si_state.c       |  3 +-
>   src/gallium/drivers/radeonsi/si_state.h       |  1 +
>   src/gallium/drivers/radeonsi/si_state_draw.c  | 25 +++--
>   src/gallium/drivers/radeonsi/si_texture.c     |  3 +
>   11 files changed, 130 insertions(+), 75 deletions(-)
> 
[snip]
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 20767c806d2..98c4fabc741 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -381,61 +381,56 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>   {
>   	struct si_context *sctx = CALLOC_STRUCT(si_context);
>   	struct si_screen* sscreen = (struct si_screen *)screen;
>   	struct radeon_winsys *ws = sscreen->ws;
>   	int shader, i;
>   	bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
>   
>   	if (!sctx)
>   		return NULL;
>   
> +	sctx->has_graphics = sscreen->info.chip_class >= CIK &&
> +			     !(flags & PIPE_CONTEXT_COMPUTE_ONLY);

The logic seems backwards here for SI.

Cheers,
Nicolai



> +
>   	if (flags & PIPE_CONTEXT_DEBUG)
>   		sscreen->record_llvm_ir = true; /* racy but not critical */
>   
>   	sctx->b.screen = screen; /* this must be set first */
>   	sctx->b.priv = NULL;
>   	sctx->b.destroy = si_destroy_context;
> -	sctx->b.emit_string_marker = si_emit_string_marker;
> -	sctx->b.set_debug_callback = si_set_debug_callback;
> -	sctx->b.set_log_context = si_set_log_context;
> -	sctx->b.set_context_param = si_set_context_param;
>   	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
>   	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
>   
>   	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
>   	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
>   
>   	sctx->ws = sscreen->ws;
>   	sctx->family = sscreen->info.family;
>   	sctx->chip_class = sscreen->info.chip_class;
>   
>   	if (sscreen->info.has_gpu_reset_counter_query) {
>   		sctx->gpu_reset_counter =
>   			sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);
>   	}
>   
> -	sctx->b.get_device_reset_status = si_get_reset_status;
> -	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> -
> -	si_init_context_texture_functions(sctx);
> -	si_init_query_functions(sctx);
>   
>   	if (sctx->chip_class == CIK ||
>   	    sctx->chip_class == VI ||
>   	    sctx->chip_class == GFX9) {
>   		sctx->eop_bug_scratch = si_resource(
>   			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
>   					   16 * sscreen->info.num_render_backends));
>   		if (!sctx->eop_bug_scratch)
>   			goto fail;
>   	}
>   
> +	/* Initialize context allocators. */
>   	sctx->allocator_zeroed_memory =
>   		u_suballocator_create(&sctx->b, 128 * 1024,
>   				      0, PIPE_USAGE_DEFAULT,
>   				      SI_RESOURCE_FLAG_UNMAPPABLE |
>   				      SI_RESOURCE_FLAG_CLEAR, false);
>   	if (!sctx->allocator_zeroed_memory)
>   		goto fail;
>   
>   	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
>   						    0, PIPE_USAGE_STREAM,
> @@ -459,38 +454,22 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>   	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
>   	if (!sctx->ctx)
>   		goto fail;
>   
>   	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
>   		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
>   						   (void*)si_flush_dma_cs,
>   						   sctx, stop_exec_on_failure);
>   	}
>   
> -	si_init_buffer_functions(sctx);
> -	si_init_clear_functions(sctx);
> -	si_init_blit_functions(sctx);
> -	si_init_compute_functions(sctx);
> -	si_init_compute_blit_functions(sctx);
> -	si_init_debug_functions(sctx);
> -	si_init_msaa_functions(sctx);
> -	si_init_streamout_functions(sctx);
> -
> -	if (sscreen->info.has_hw_decode) {
> -		sctx->b.create_video_codec = si_uvd_create_decoder;
> -		sctx->b.create_video_buffer = si_video_buffer_create;
> -	} else {
> -		sctx->b.create_video_codec = vl_create_decoder;
> -		sctx->b.create_video_buffer = vl_video_buffer_create;
> -	}
> -
> -	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
> +	sctx->gfx_cs = ws->cs_create(sctx->ctx,
> +				     sctx->has_graphics ? RING_GFX : RING_COMPUTE,
>   				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
>   
>   	/* Border colors. */
>   	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>   					  sizeof(*sctx->border_color_table));
>   	if (!sctx->border_color_table)
>   		goto fail;
>   
>   	sctx->border_color_buffer = si_resource(
>   		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
> @@ -498,43 +477,76 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>   				   sizeof(*sctx->border_color_table)));
>   	if (!sctx->border_color_buffer)
>   		goto fail;
>   
>   	sctx->border_color_map =
>   		ws->buffer_map(sctx->border_color_buffer->buf,
>   			       NULL, PIPE_TRANSFER_WRITE);
>   	if (!sctx->border_color_map)
>   		goto fail;
>   
> +	/* Initialize context functions used by graphics and compute. */
> +	sctx->b.emit_string_marker = si_emit_string_marker;
> +	sctx->b.set_debug_callback = si_set_debug_callback;
> +	sctx->b.set_log_context = si_set_log_context;
> +	sctx->b.set_context_param = si_set_context_param;
> +	sctx->b.get_device_reset_status = si_get_reset_status;
> +	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
> +	sctx->b.memory_barrier = si_memory_barrier;
> +
>   	si_init_all_descriptors(sctx);
> +	si_init_buffer_functions(sctx);
> +	si_init_clear_functions(sctx);
> +	si_init_blit_functions(sctx);
> +	si_init_compute_functions(sctx);
> +	si_init_compute_blit_functions(sctx);
> +	si_init_debug_functions(sctx);
>   	si_init_fence_functions(sctx);
> -	si_init_state_functions(sctx);
> -	si_init_shader_functions(sctx);
> -	si_init_viewport_functions(sctx);
> -
> -	if (sctx->chip_class >= CIK)
> -		cik_init_sdma_functions(sctx);
> -	else
> -		si_init_dma_functions(sctx);
>   
>   	if (sscreen->debug_flags & DBG(FORCE_DMA))
>   		sctx->b.resource_copy_region = sctx->dma_copy;
>   
> -	sctx->blitter = util_blitter_create(&sctx->b);
> -	if (sctx->blitter == NULL)
> -		goto fail;
> -	sctx->blitter->skip_viewport_restore = true;
> +	/* Initialize graphics-only context functions. */
> +	if (sctx->has_graphics) {
> +		si_init_context_texture_functions(sctx);
> +		si_init_query_functions(sctx);
> +		si_init_msaa_functions(sctx);
> +		si_init_shader_functions(sctx);
> +		si_init_state_functions(sctx);
> +		si_init_streamout_functions(sctx);
> +		si_init_viewport_functions(sctx);
> +
> +		sctx->blitter = util_blitter_create(&sctx->b);
> +		if (sctx->blitter == NULL)
> +			goto fail;
> +		sctx->blitter->skip_viewport_restore = true;
>   
> -	si_init_draw_functions(sctx);
> +		si_init_draw_functions(sctx);
> +	}
> +
> +	/* Initialize SDMA functions. */
> +	if (sctx->chip_class >= CIK)
> +		cik_init_sdma_functions(sctx);
> +	else
> +		si_init_dma_functions(sctx);
>   
>   	sctx->sample_mask = 0xffff;
>   
> +	/* Initialize multimedia functions. */
> +	if (sscreen->info.has_hw_decode) {
> +		sctx->b.create_video_codec = si_uvd_create_decoder;
> +		sctx->b.create_video_buffer = si_video_buffer_create;
> +	} else {
> +		sctx->b.create_video_codec = vl_create_decoder;
> +		sctx->b.create_video_buffer = vl_video_buffer_create;
> +	}
> +
>   	if (sctx->chip_class >= GFX9) {
>   		sctx->wait_mem_scratch = si_resource(
>   			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
>   		if (!sctx->wait_mem_scratch)
>   			goto fail;
>   
>   		/* Initialize the memory. */
>   		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
>   				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
>   	}
> @@ -544,21 +556,22 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>   	if (sctx->chip_class == CIK) {
>   		sctx->null_const_buf.buffer =
>   			pipe_aligned_buffer_create(screen,
>   						   SI_RESOURCE_FLAG_32BIT,
>   						   PIPE_USAGE_DEFAULT, 16,
>   						   sctx->screen->info.tcc_cache_line_size);
>   		if (!sctx->null_const_buf.buffer)
>   			goto fail;
>   		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
>   
> -		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> +		unsigned start_shader = sctx->has_graphics ? 0 :  PIPE_SHADER_COMPUTE;
> +		for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
>   			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>   				sctx->b.set_constant_buffer(&sctx->b, shader, i,
>   							      &sctx->null_const_buf);
>   			}
>   		}
>   
>   		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
>   				 &sctx->null_const_buf);
>   		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
>   				 &sctx->null_const_buf);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index b01d5744752..348e8e5bd26 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -777,21 +777,21 @@ struct si_saved_cs {
>   };
>   
>   struct si_context {
>   	struct pipe_context		b; /* base class */
>   
>   	enum radeon_family		family;
>   	enum chip_class			chip_class;
>   
>   	struct radeon_winsys		*ws;
>   	struct radeon_winsys_ctx	*ctx;
> -	struct radeon_cmdbuf		*gfx_cs;
> +	struct radeon_cmdbuf		*gfx_cs; /* compute IB if graphics is disabled */
>   	struct radeon_cmdbuf		*dma_cs;
>   	struct pipe_fence_handle	*last_gfx_fence;
>   	struct pipe_fence_handle	*last_sdma_fence;
>   	struct si_resource		*eop_bug_scratch;
>   	struct u_upload_mgr		*cached_gtt_allocator;
>   	struct threaded_context		*tc;
>   	struct u_suballocator		*allocator_zeroed_memory;
>   	struct slab_child_pool		pool_transfers;
>   	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
>   	struct pipe_device_reset_callback device_reset_callback;
> @@ -815,20 +815,21 @@ struct si_context {
>   	void				*cs_clear_render_target;
>   	void				*cs_clear_render_target_1d_array;
>   	struct si_screen		*screen;
>   	struct pipe_debug_callback	debug;
>   	struct ac_llvm_compiler		compiler; /* only non-threaded compilation */
>   	struct si_shader_ctx_state	fixed_func_tcs_shader;
>   	struct si_resource		*wait_mem_scratch;
>   	unsigned			wait_mem_number;
>   	uint16_t			prefetch_L2_mask;
>   
> +	bool				has_graphics;
>   	bool				gfx_flush_in_progress:1;
>   	bool				gfx_last_ib_is_busy:1;
>   	bool				compute_is_busy:1;
>   
>   	unsigned			num_gfx_cs_flushes;
>   	unsigned			initial_gfx_cs_size;
>   	unsigned			gpu_reset_counter;
>   	unsigned			last_dirty_tex_counter;
>   	unsigned			last_compressed_colortex_counter;
>   	unsigned			last_num_draw_calls;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index b49a1b3695e..458b108a7e3 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -4699,21 +4699,21 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
>   
>   	si_update_fb_dirtiness_after_rendering(sctx);
>   
>   	/* Multisample surfaces are flushed in si_decompress_textures. */
>   	if (sctx->framebuffer.uncompressed_cb_mask)
>   		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
>   					   sctx->framebuffer.CB_has_shader_readable_metadata);
>   }
>   
>   /* This only ensures coherency for shader image/buffer stores. */
> -static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	/* Subsequent commands must wait for all shader invocations to
>   	 * complete. */
>   	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>   	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
>   
>   	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
>   		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
> @@ -4813,21 +4813,20 @@ void si_init_state_functions(struct si_context *sctx)
>   	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
>   
>   	sctx->b.set_sample_mask = si_set_sample_mask;
>   
>   	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
>   	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
>   	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
>   	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
>   
>   	sctx->b.texture_barrier = si_texture_barrier;
> -	sctx->b.memory_barrier = si_memory_barrier;
>   	sctx->b.set_min_samples = si_set_min_samples;
>   	sctx->b.set_tess_state = si_set_tess_state;
>   
>   	sctx->b.set_active_query_state = si_set_active_query_state;
>   
>   	si_init_config(sctx);
>   }
>   
>   void si_init_screen_state_functions(struct si_screen *sscreen)
>   {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 767e789276a..6faa4c511b1 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -482,20 +482,21 @@ void si_set_active_descriptors_for_shader(struct si_context *sctx,
>   					  struct si_shader_selector *sel);
>   bool si_bindless_descriptor_can_reclaim_slab(void *priv,
>   					     struct pb_slab_entry *entry);
>   struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
>   						  unsigned entry_size,
>   						  unsigned group_index);
>   void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
>   void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
>   		      uint64_t old_va);
>   /* si_state.c */
> +void si_memory_barrier(struct pipe_context *ctx, unsigned flags);
>   void si_init_state_functions(struct si_context *sctx);
>   void si_init_screen_state_functions(struct si_screen *sscreen);
>   void
>   si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
>   			  enum pipe_format format,
>   			  unsigned offset, unsigned size,
>   			  uint32_t *state);
>   void
>   si_make_texture_descriptor(struct si_screen *screen,
>   			   struct si_texture *tex,
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 9c968e39c2c..2a514f144b9 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -872,21 +872,21 @@ static void si_emit_draw_packets(struct si_context *sctx,
>   				        S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
>   		}
>   	}
>   }
>   
>   static void si_emit_surface_sync(struct si_context *sctx,
>   				 unsigned cp_coher_cntl)
>   {
>   	struct radeon_cmdbuf *cs = sctx->gfx_cs;
>   
> -	if (sctx->chip_class >= GFX9) {
> +	if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
>   		/* Flush caches and wait for the caches to assert idle. */
>   		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
>   		radeon_emit(cs, cp_coher_cntl);	/* CP_COHER_CNTL */
>   		radeon_emit(cs, 0xffffffff);	/* CP_COHER_SIZE */
>   		radeon_emit(cs, 0xffffff);	/* CP_COHER_SIZE_HI */
>   		radeon_emit(cs, 0);		/* CP_COHER_BASE */
>   		radeon_emit(cs, 0);		/* CP_COHER_BASE_HI */
>   		radeon_emit(cs, 0x0000000A);	/* POLL_INTERVAL */
>   	} else {
>   		/* ACQUIRE_MEM is only required on a compute ring. */
> @@ -895,20 +895,32 @@ static void si_emit_surface_sync(struct si_context *sctx,
>   		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
>   		radeon_emit(cs, 0);               /* CP_COHER_BASE */
>   		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
>   	}
>   }
>   
>   void si_emit_cache_flush(struct si_context *sctx)
>   {
>   	struct radeon_cmdbuf *cs = sctx->gfx_cs;
>   	uint32_t flags = sctx->flags;
> +
> +	if (!sctx->has_graphics) {
> +		/* Only process compute flags. */
> +		flags &= SI_CONTEXT_INV_ICACHE |
> +			 SI_CONTEXT_INV_SMEM_L1 |
> +			 SI_CONTEXT_INV_VMEM_L1 |
> +			 SI_CONTEXT_INV_GLOBAL_L2 |
> +			 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
> +			 SI_CONTEXT_INV_L2_METADATA |
> +			 SI_CONTEXT_CS_PARTIAL_FLUSH;
> +	}
> +
>   	uint32_t cp_coher_cntl = 0;
>   	uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
>   					SI_CONTEXT_FLUSH_AND_INV_DB);
>   
>   	if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
>   		sctx->num_cb_cache_flushes++;
>   	if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>   		sctx->num_db_cache_flushes++;
>   
>   	/* SI has a bug that it always flushes ICACHE and KCACHE if either
> @@ -1061,25 +1073,26 @@ void si_emit_cache_flush(struct si_context *sctx)
>   				  EOP_DATA_SEL_VALUE_32BIT,
>   				  sctx->wait_mem_scratch, va,
>   				  sctx->wait_mem_number, SI_NOT_QUERY);
>   		si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
>   			       WAIT_REG_MEM_EQUAL);
>   	}
>   
>   	/* Make sure ME is idle (it executes most packets) before continuing.
>   	 * This prevents read-after-write hazards between PFP and ME.
>   	 */
> -	if (cp_coher_cntl ||
> -	    (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> -			    SI_CONTEXT_INV_VMEM_L1 |
> -			    SI_CONTEXT_INV_GLOBAL_L2 |
> -			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> +	if (sctx->has_graphics &&
> +	    (cp_coher_cntl ||
> +	     (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
> +		       SI_CONTEXT_INV_VMEM_L1 |
> +		       SI_CONTEXT_INV_GLOBAL_L2 |
> +		       SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
>   		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>   		radeon_emit(cs, 0);
>   	}
>   
>   	/* SI-CI-VI only:
>   	 *   When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC
>   	 *   waits for idle, so it should be last. SURFACE_SYNC is done in PFP.
>   	 *
>   	 * cp_coher_cntl should contain all necessary flags except TC flags
>   	 * at this point.
> diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
> index a50088d2d8f..581f90a7b2f 100644
> --- a/src/gallium/drivers/radeonsi/si_texture.c
> +++ b/src/gallium/drivers/radeonsi/si_texture.c
> @@ -457,20 +457,23 @@ static bool si_texture_discard_dcc(struct si_screen *sscreen,
>    *   compressed tiled
>    *
>    * \param sctx  the current context if you have one, or sscreen->aux_context
>    *              if you don't.
>    */
>   bool si_texture_disable_dcc(struct si_context *sctx,
>   			    struct si_texture *tex)
>   {
>   	struct si_screen *sscreen = sctx->screen;
>   
> +	if (!sctx->has_graphics)
> +		return si_texture_discard_dcc(sscreen, tex);
> +
>   	if (!si_can_disable_dcc(tex))
>   		return false;
>   
>   	if (&sctx->b == sscreen->aux_context)
>   		mtx_lock(&sscreen->aux_context_lock);
>   
>   	/* Decompress DCC. */
>   	si_decompress_dcc(sctx, tex);
>   	sctx->b.flush(&sctx->b, NULL, 0);
>   
>