[3/7] radv: implement all depth/stencil resolve modes using compute

Submitted by Samuel Pitoiset on May 27, 2019, 3:41 p.m.

Details

Message ID 20190527154150.23144-4-samuel.pitoiset@gmail.com
State New
Headers show
Series "radv: implement VK_KHR_depth_stencil_resolve" ( rev: 1 ) in Mesa

Commit Message

Samuel Pitoiset May 27, 2019, 3:41 p.m.
This path supports layers but it requires to decompress HTILE
before resolving. The driver also needs to fixup HTILE after
the resolve. This path is probably slower than the graphics one.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/amd/vulkan/radv_meta_resolve_cs.c | 491 ++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |  16 +
 2 files changed, 507 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index 1b786526cf2..c0091bd9292 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -139,6 +139,165 @@  build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
 	return b.shader;
 }
 
+enum {
+	DEPTH_RESOLVE,
+	STENCIL_RESOLVE,
+};
+
+static const char *
+get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
+{
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		return "zero";
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		return "average";
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		return "min";
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		return "max";
+	default:
+		unreachable("invalid resolve mode");
+	}
+}
+
+static nir_shader *
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
+					   int index,
+					   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	nir_builder b;
+	char name[64];
+	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+								 false,
+								 false,
+								 GLSL_TYPE_FLOAT);
+	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+							     false,
+							     false,
+							     GLSL_TYPE_FLOAT);
+	snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
+		 index == DEPTH_RESOLVE ? "depth" : "stencil",
+		 get_resolve_mode_str(resolve_mode), samples);
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      sampler_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 16);
+	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	src_offset->num_components = 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	nir_builder_instr_insert(&b, &src_offset->instr);
+
+	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(dst_offset, 0);
+	nir_intrinsic_set_range(dst_offset, 16);
+	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	dst_offset->num_components = 2;
+	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+	nir_builder_instr_insert(&b, &dst_offset->instr);
+
+	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+
+	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+	nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
+
+	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+	tex->op = nir_texop_txf_ms;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(img_coord);
+	tex->src[1].src_type = nir_tex_src_ms_index;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+	tex->src[2].src_type = nir_tex_src_texture_deref;
+	tex->src[2].src = nir_src_for_ssa(input_img_deref);
+	tex->dest_type = type;
+	tex->is_array = false;
+	tex->coord_components = 2;
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(&b, &tex->instr);
+
+	nir_ssa_def *outval = &tex->dest.ssa;
+
+	if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+		for (int i = 1; i < samples; i++) {
+			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+			tex_add->op = nir_texop_txf_ms;
+			tex_add->src[0].src_type = nir_tex_src_coord;
+			tex_add->src[0].src = nir_src_for_ssa(img_coord);
+			tex_add->src[1].src_type = nir_tex_src_ms_index;
+			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+			tex_add->src[2].src_type = nir_tex_src_texture_deref;
+			tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+			tex_add->dest_type = type;
+			tex_add->is_array = false;
+			tex_add->coord_components = 2;
+
+			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+			nir_builder_instr_insert(&b, &tex_add->instr);
+
+			switch (resolve_mode) {
+			case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+				assert(index == DEPTH_RESOLVE);
+				outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MIN_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+				break;
+			case VK_RESOLVE_MODE_MAX_BIT_KHR:
+				if (index == DEPTH_RESOLVE)
+					outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+				else
+					outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+				break;
+			default:
+				unreachable("invalid resolve mode");
+			}
+		}
+
+		if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+			outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+	}
+
+	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
+	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+	store->num_components = 4;
+	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+	store->src[1] = nir_src_for_ssa(coord);
+	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+	store->src[3] = nir_src_for_ssa(outval);
+	nir_builder_instr_insert(&b, &store->instr);
+	return b.shader;
+}
 
 static VkResult
 create_layout(struct radv_device *device)
@@ -248,6 +407,57 @@  fail:
 	return result;
 }
 
+static VkResult
+create_depth_stencil_resolve_pipeline(struct radv_device *device,
+				      int samples,
+				      int index,
+				      VkResolveModeFlagBitsKHR resolve_mode,
+				      VkPipeline *pipeline)
+{
+	VkResult result;
+	struct radv_shader_module cs = { .nir = NULL };
+
+	mtx_lock(&device->meta_state.mtx);
+	if (*pipeline) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
+	cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
+							    index, resolve_mode);
+
+	/* compute shader */
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.resolve_compute.p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &vk_pipeline_info, NULL,
+					     pipeline);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return VK_SUCCESS;
+fail:
+	ralloc_free(cs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return result;
+}
+
 VkResult
 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
@@ -279,8 +489,56 @@  radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_
 		if (res != VK_SUCCESS)
 			goto fail;
 
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+							    &state->resolve_compute.depth[i].average_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.depth[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    DEPTH_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.depth[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MAX_BIT_KHR,
+							    &state->resolve_compute.stencil[i].max_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_depth_stencil_resolve_pipeline(device, samples,
+							    STENCIL_RESOLVE,
+							    VK_RESOLVE_MODE_MIN_BIT_KHR,
+							    &state->resolve_compute.stencil[i].min_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
 	}
 
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    DEPTH_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.depth_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
+	res = create_depth_stencil_resolve_pipeline(device, 0,
+						    STENCIL_RESOLVE,
+						    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+						    &state->resolve_compute.stencil_zero_pipeline);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	return VK_SUCCESS;
 fail:
 	radv_device_finish_meta_resolve_compute_state(device);
@@ -303,8 +561,36 @@  radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     state->resolve_compute.rc[i].srgb_pipeline,
 				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].average_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.depth[i].min_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].max_pipeline,
+				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.stencil[i].min_pipeline,
+				     &state->alloc);
 	}
 
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.depth_zero_pipeline,
+			     &state->alloc);
+
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->resolve_compute.stencil_zero_pipeline,
+			     &state->alloc);
+
 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 					state->resolve_compute.ds_layout,
 					&state->alloc);
@@ -395,6 +681,113 @@  emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 
 }
 
+static void
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_image_view *src_iview,
+			   struct radv_image_view *dest_iview,
+			   const VkOffset2D *src_offset,
+			   const VkOffset2D *dest_offset,
+			   const VkExtent2D *resolve_extent,
+			   VkImageAspectFlags aspects,
+			   VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_device *device = cmd_buffer->device;
+	const uint32_t samples = src_iview->image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	VkPipeline *pipeline;
+
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.resolve_compute.p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+					{
+						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						.dstBinding = 0,
+						.dstArrayElement = 0,
+						.descriptorCount = 1,
+						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+			                      .pImageInfo = (VkDescriptorImageInfo[]) {
+		                              {
+	                                      .sampler = VK_NULL_HANDLE,
+					      .imageView = radv_image_view_to_handle(src_iview),
+	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
+	                      }
+		              },
+		              {
+		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+		                      .dstBinding = 1,
+		                      .dstArrayElement = 0,
+				      .descriptorCount = 1,
+				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+	                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                              {
+                                      .sampler = VK_NULL_HANDLE,
+                                     .imageView = radv_image_view_to_handle(dest_iview),
+                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                              },
+                      }
+			      }
+				      });
+
+	switch (resolve_mode) {
+	case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+		break;
+	case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+		assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+		pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MIN_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+		break;
+	case VK_RESOLVE_MODE_MAX_BIT_KHR:
+		if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+			pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+		else
+			pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+		break;
+	default:
+		unreachable("invalid resolve mode");
+	}
+
+	if (!*pipeline) {
+		int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+		VkResult ret;
+
+		ret = create_depth_stencil_resolve_pipeline(device, samples,
+							    index, resolve_mode,
+							    pipeline);
+		if (ret != VK_SUCCESS) {
+			cmd_buffer->record_result = ret;
+			return;
+		}
+	}
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+
+	unsigned push_constants[4] = {
+		src_offset->x,
+		src_offset->y,
+		dest_offset->x,
+		dest_offset->y,
+	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.resolve_compute.p_layout,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      push_constants);
+	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
 				     VkImageLayout src_image_layout,
@@ -564,3 +957,101 @@  radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 
 	radv_meta_restore(&saved_state, cmd_buffer);
 }
+
+void
+radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+				      VkImageAspectFlags aspects,
+				      VkResolveModeFlagBitsKHR resolve_mode)
+{
+	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	struct radv_meta_saved_state saved_state;
+	struct radv_subpass_barrier barrier;
+
+	/* Resolves happen before the end-of-subpass barriers get executed, so
+	 * we have to make the attachment shader-readable.
+	 */
+	barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+	barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+	barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+	radv_subpass_barrier(cmd_buffer, &barrier);
+
+	radv_decompress_resolve_subpass_src(cmd_buffer);
+
+	radv_meta_save(&saved_state, cmd_buffer,
+		       RADV_META_SAVE_COMPUTE_PIPELINE |
+		       RADV_META_SAVE_CONSTANTS |
+		       RADV_META_SAVE_DESCRIPTORS);
+
+	struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+	struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+
+	struct radv_image_view *src_iview =
+		cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+	struct radv_image_view *dst_iview =
+		cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+
+	struct radv_image *src_image = src_iview->image;
+	struct radv_image *dst_image = dst_iview->image;
+
+	for (uint32_t layer = 0; layer < src_image->info.array_size; layer++) {
+		struct radv_image_view tsrc_iview;
+		radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(src_image),
+					.viewType = radv_meta_get_view_type(src_image),
+					.format = src_image->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = src_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = layer,
+						.layerCount = 1,
+					},
+				     });
+
+		struct radv_image_view tdst_iview;
+		radv_image_view_init(&tdst_iview, cmd_buffer->device,
+				     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(dst_image),
+					.viewType = radv_meta_get_view_type(dst_image),
+					.format = dst_image->vk_format,
+					.subresourceRange = {
+						.aspectMask = aspects,
+						.baseMipLevel = dst_iview->base_mip,
+						.levelCount = 1,
+						.baseArrayLayer = layer,
+						.layerCount = 1,
+					},
+				     });
+
+		emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkOffset2D) { 0, 0 },
+					   &(VkExtent2D) { fb->width, fb->height },
+					   aspects,
+					   resolve_mode);
+	}
+
+	if (radv_image_has_htile(dst_image)) {
+		VkImageSubresourceRange range = {};
+		range.aspectMask = aspects;
+		range.baseMipLevel = 0;
+		range.levelCount = 1;
+		range.baseArrayLayer = 0;
+		range.layerCount = dst_image->info.array_size;
+
+		/* Recalculate the HTILE data of the resolve image based on the
+		 * Z-buffer values.
+		 */
+		radv_resummarize_depth_image_inplace(cmd_buffer, dst_image,
+						     &range);
+	}
+
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b006ba4d771..350df91682a 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -573,6 +573,19 @@  struct radv_meta_state {
 			VkPipeline                                i_pipeline;
 			VkPipeline                                srgb_pipeline;
 		} rc[MAX_SAMPLES_LOG2];
+
+		VkPipeline depth_zero_pipeline;
+		struct {
+			VkPipeline average_pipeline;
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} depth[MAX_SAMPLES_LOG2];
+
+		VkPipeline stencil_zero_pipeline;
+		struct {
+			VkPipeline max_pipeline;
+			VkPipeline min_pipeline;
+		} stencil[MAX_SAMPLES_LOG2];
 	} resolve_compute;
 
 	struct {
@@ -1226,6 +1239,9 @@  radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+					   VkImageAspectFlags aspects,
+					   VkResolveModeFlagBitsKHR resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
 					   VkImageAspectFlags aspects,