[2/3] gallium/u_blitter: implement copying from ZS to color and vice versa

Submitted by Marek Olšák on June 21, 2019, 5:02 p.m.

Details

Message ID 20190621170250.27794-2-maraeo@gmail.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák June 21, 2019, 5:02 p.m.
From: Marek Olšák <marek.olsak@amd.com>

This is for drivers that can't map depth and stencil and need to blit
them to a color texture for CPU access.

This also useful for drivers using separate depth and stencil.
---
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h  |   6 +
 src/gallium/auxiliary/util/u_blitter.c        | 147 ++++++++++++----
 src/gallium/auxiliary/util/u_blitter.h        |  29 ++++
 src/gallium/auxiliary/util/u_simple_shaders.c | 161 ++++++++++++++++++
 src/gallium/auxiliary/util/u_simple_shaders.h |   6 +
 5 files changed, 314 insertions(+), 35 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 54a1ee15b68..2ade618db00 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -158,20 +158,26 @@  OP11(UARL)
 
 OP13(UCMP)
 OP11(IABS)
 OP11(ISSG)
 
 OP11(IMG2HND)
 OP11(SAMP2HND)
 
 OP12(IMUL_HI)
 OP12(UMUL_HI)
+OP13(UBFE)
+OP11(F2D)
+OP11(D2F)
+OP11(U2D)
+OP11(D2U)
+OP12(DMUL)
 
 #undef OP00
 #undef OP01
 #undef OP10
 #undef OP11
 #undef OP12
 #undef OP13
 
 #undef OP00_LBL
 #undef OP01_LBL
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 8e4807ec670..18a5c272454 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -95,20 +95,25 @@  struct blitter_context_priv
 
    /* FS which outputs one sample from a multisample texture. */
    void *fs_texfetch_col_msaa[5][PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES];
 
    /* FS which outputs an average of all samples. */
    void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
 
+   /* FS which unpacks color to ZS or packs ZS to color, matching
+    * the ZS format. See util_blitter_get_color_format_for_zs().
+    */
+   void *fs_pack_color_zs[TGSI_TEXTURE_COUNT][10];
+
    /* Blend state. */
    void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */
    void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1];
 
    /* Depth stencil alpha state. */
    void *dsa_write_depth_stencil;
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
    void *dsa_keep_depth_write_stencil;
 
@@ -504,20 +509,27 @@  void util_blitter_destroy(struct blitter_context *blitter)
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i]);
       if (ctx->fs_texfetch_stencil_msaa[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i]);
 
       for (j = 0; j< ARRAY_SIZE(ctx->fs_resolve[i]); j++)
          for (f = 0; f < 2; f++)
             if (ctx->fs_resolve[i][j][f])
                ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]);
    }
 
+   for (i = 0; i < ARRAY_SIZE(ctx->fs_pack_color_zs); i++) {
+      for (j = 0; j < ARRAY_SIZE(ctx->fs_pack_color_zs[0]); j++) {
+         if (ctx->fs_pack_color_zs[i][j])
+            ctx->delete_fs_state(pipe, ctx->fs_pack_color_zs[i][j]);
+      }
+   }
+
    if (ctx->fs_empty)
       ctx->delete_fs_state(pipe, ctx->fs_empty);
    if (ctx->fs_write_one_cbuf)
       ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
    if (ctx->fs_write_all_cbufs)
       ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
 
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear);
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect);
    pipe->delete_sampler_state(pipe, ctx->sampler_state_linear);
@@ -975,20 +987,58 @@  static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
          *shader = util_make_fragment_tex_shader(pipe, tgsi_tex,
                                                  TGSI_INTERPOLATE_LINEAR,
                                                  stype, dtype,
                                                  ctx->has_tex_lz, use_txf);
       }
 
       return *shader;
    }
 }
 
+static inline
+void *blitter_get_fs_pack_color_zs(struct blitter_context_priv *ctx,
+                                   enum pipe_texture_target target,
+                                   unsigned nr_samples,
+                                   enum pipe_format zs_format,
+                                   bool dst_is_color)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+   enum tgsi_texture_type tgsi_tex =
+      util_pipe_tex_to_tgsi_tex(target, nr_samples);
+   int format_index = zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ? 0 :
+                      zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ? 1 :
+                      zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 :
+                      zs_format == PIPE_FORMAT_Z24X8_UNORM ? 3 :
+                      zs_format == PIPE_FORMAT_X8Z24_UNORM ? 4 : -1;
+
+   if (format_index == -1) {
+      assert(0);
+      return NULL;
+   }
+
+   /* The first 5 shaders pack ZS to color, the last 5 shaders unpack color
+    * to ZS.
+    */
+   if (dst_is_color)
+      format_index += 5;
+
+   void **shader = &ctx->fs_pack_color_zs[tgsi_tex][format_index];
+
+   /* Create the fragment shader on-demand. */
+   if (!*shader) {
+      assert(!ctx->cached_all_shaders);
+      *shader = util_make_fs_pack_color_zs(pipe, tgsi_tex, zs_format,
+                                           dst_is_color);
+   }
+   return *shader;
+}
+
 static inline
 void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
                                     enum pipe_texture_target target,
                                     unsigned nr_samples,
                                     bool use_txf)
 {
    struct pipe_context *pipe = ctx->base.pipe;
 
    assert(target < PIPE_MAX_TEXTURE_TYPES);
 
@@ -1806,48 +1856,47 @@  void util_blitter_blit_generic(struct blitter_context *blitter,
                                unsigned src_width0, unsigned src_height0,
                                unsigned mask, unsigned filter,
                                const struct pipe_scissor_state *scissor,
                                bool alpha_blend)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
    enum pipe_texture_target src_target = src->target;
    unsigned src_samples = src->texture->nr_samples;
    unsigned dst_samples = dst->texture->nr_samples;
-   bool has_depth, has_stencil, has_color;
-   bool blit_stencil, blit_depth, blit_color;
    void *sampler_state;
    const struct util_format_description *src_desc =
          util_format_description(src->format);
    const struct util_format_description *dst_desc =
          util_format_description(dst->format);
 
-   has_color = src_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
-               dst_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS;
-   has_depth = util_format_has_depth(src_desc) &&
-               util_format_has_depth(dst_desc);
-   has_stencil = util_format_has_stencil(src_desc) &&
-                 util_format_has_stencil(dst_desc);
+   bool src_has_color = src_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS;
+   bool src_has_depth = util_format_has_depth(src_desc);
+   bool src_has_stencil = util_format_has_stencil(src_desc);
 
-   blit_color = has_color && (mask & PIPE_MASK_RGBA);
-   blit_depth = has_depth && (mask & PIPE_MASK_Z);
-   blit_stencil = has_stencil && (mask & PIPE_MASK_S) &&
-                  ctx->has_stencil_export;
+   bool dst_has_color = mask & PIPE_MASK_RGBA &&
+                        dst_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS;
+   bool dst_has_depth = mask & PIPE_MASK_Z &&
+                        util_format_has_depth(dst_desc);
+   bool dst_has_stencil = ctx->has_stencil_export &&
+                          mask & PIPE_MASK_S &&
+                          util_format_has_stencil(dst_desc);
 
-   if (!blit_stencil && !blit_depth && !blit_color) {
+   /* Return if there is nothing to do. */
+   if (!dst_has_color && !dst_has_depth && !dst_has_stencil) {
       return;
    }
 
    bool is_scaled = dstbox->width != abs(srcbox->width) ||
                     dstbox->height != abs(srcbox->height);
 
-   if (blit_stencil || !is_scaled)
+   if (src_has_stencil || !is_scaled)
       filter = PIPE_TEX_FILTER_NEAREST;
 
    bool use_txf = false;
 
    /* Don't support scaled blits. The TXF shader uses F2I for rounding. */
    if (ctx->has_txf &&
        !is_scaled &&
        filter == PIPE_TEX_FILTER_NEAREST &&
        src->target != PIPE_TEXTURE_CUBE &&
        src->target != PIPE_TEXTURE_CUBE_ARRAY) {
@@ -1881,88 +1930,116 @@  void util_blitter_blit_generic(struct blitter_context *blitter,
    }
 
    /* Check whether the states are properly saved. */
    util_blitter_set_running_flag(blitter);
    blitter_check_saved_vertex_states(ctx);
    blitter_check_saved_fragment_states(ctx);
    blitter_check_saved_textures(ctx);
    blitter_check_saved_fb_state(ctx);
    blitter_disable_render_cond(ctx);
 
-   if (blit_depth || blit_stencil) {
+   /* Blend, DSA, fragment shader. */
+   if (dst_has_depth && dst_has_stencil) {
       pipe->bind_blend_state(pipe, ctx->blend[0][0]);
-
-      if (blit_depth && blit_stencil) {
-         pipe->bind_depth_stencil_alpha_state(pipe,
-                                              ctx->dsa_write_depth_stencil);
+      pipe->bind_depth_stencil_alpha_state(pipe,
+                                           ctx->dsa_write_depth_stencil);
+      if (src_has_color) {
+         assert(use_txf);
          ctx->bind_fs_state(pipe,
-               blitter_get_fs_texfetch_depthstencil(ctx, src_target,
-                                                    src_samples, use_txf));
-      } else if (blit_depth) {
-         pipe->bind_depth_stencil_alpha_state(pipe,
-                                              ctx->dsa_write_depth_keep_stencil);
+            blitter_get_fs_pack_color_zs(ctx, src_target,
+                                         src_samples, dst->format, false));
+      } else {
          ctx->bind_fs_state(pipe,
-               blitter_get_fs_texfetch_depth(ctx, src_target,
-                                             src_samples, use_txf));
-      } else { /* is_stencil */
-         pipe->bind_depth_stencil_alpha_state(pipe,
-                                              ctx->dsa_keep_depth_write_stencil);
+            blitter_get_fs_texfetch_depthstencil(ctx, src_target,
+                                                 src_samples, use_txf));
+      }
+   } else if (dst_has_depth) {
+      pipe->bind_blend_state(pipe, ctx->blend[0][0]);
+      pipe->bind_depth_stencil_alpha_state(pipe,
+                                           ctx->dsa_write_depth_keep_stencil);
+      if (src_has_color &&
+          (src->format == PIPE_FORMAT_R32_UINT ||
+           src->format == PIPE_FORMAT_R32G32_UINT)) {
+         assert(use_txf);
+         ctx->bind_fs_state(pipe,
+            blitter_get_fs_pack_color_zs(ctx, src_target,
+                                         src_samples, dst->format, false));
+      } else {
          ctx->bind_fs_state(pipe,
-               blitter_get_fs_texfetch_stencil(ctx, src_target,
-                                               src_samples, use_txf));
+            blitter_get_fs_texfetch_depth(ctx, src_target,
+                                          src_samples, use_txf));
       }
+   } else if (dst_has_stencil) {
+      pipe->bind_blend_state(pipe, ctx->blend[0][0]);
+      pipe->bind_depth_stencil_alpha_state(pipe,
+                                           ctx->dsa_keep_depth_write_stencil);
 
+      assert(src_has_stencil); /* unpacking from color is unsupported */
+      ctx->bind_fs_state(pipe,
+         blitter_get_fs_texfetch_stencil(ctx, src_target,
+                                         src_samples, use_txf));
    } else {
       unsigned colormask = mask & PIPE_MASK_RGBA;
 
       pipe->bind_blend_state(pipe, ctx->blend[colormask][alpha_blend]);
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-      ctx->bind_fs_state(pipe,
+
+      if (src_has_depth &&
+          (dst->format == PIPE_FORMAT_R32_UINT ||
+           dst->format == PIPE_FORMAT_R32G32_UINT)) {
+         assert(use_txf);
+         ctx->bind_fs_state(pipe,
+            blitter_get_fs_pack_color_zs(ctx, src_target,
+                                         src_samples, src->format, true));
+      } else {
+         ctx->bind_fs_state(pipe,
             blitter_get_fs_texfetch_col(ctx, src->format, dst->format, src_target,
                                         src_samples, dst_samples, filter,
                                         use_txf));
+      }
    }
 
    /* Set the linear filter only for scaled color non-MSAA blits. */
    if (filter == PIPE_TEX_FILTER_LINEAR) {
       if (src_target == PIPE_TEXTURE_RECT) {
          sampler_state = ctx->sampler_state_rect_linear;
       } else {
          sampler_state = ctx->sampler_state_linear;
       }
    } else {
       if (src_target == PIPE_TEXTURE_RECT) {
          sampler_state = ctx->sampler_state_rect;
       } else {
          sampler_state = ctx->sampler_state;
       }
    }
 
    /* Set samplers. */
-   if (blit_depth && blit_stencil) {
+   if (src_has_depth && src_has_stencil &&
+       (dst_has_color || (dst_has_depth && dst_has_stencil))) {
       /* Setup two samplers, one for depth and the other one for stencil. */
       struct pipe_sampler_view templ;
       struct pipe_sampler_view *views[2];
       void *samplers[2] = {sampler_state, sampler_state};
 
       templ = *src;
       templ.format = util_format_stencil_only(templ.format);
       assert(templ.format != PIPE_FORMAT_NONE);
 
       views[0] = src;
       views[1] = pipe->create_sampler_view(pipe, src->texture, &templ);
 
       pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, views);
       pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 2, samplers);
 
       pipe_sampler_view_reference(&views[1], NULL);
-   } else if (blit_stencil) {
+   } else if (src_has_stencil && dst_has_stencil) {
       /* Set a stencil-only sampler view for it not to sample depth instead. */
       struct pipe_sampler_view templ;
       struct pipe_sampler_view *view;
 
       templ = *src;
       templ.format = util_format_stencil_only(templ.format);
       assert(templ.format != PIPE_FORMAT_NONE);
 
       view = pipe->create_sampler_view(pipe, src->texture, &templ);
 
@@ -1977,21 +2054,21 @@  void util_blitter_blit_generic(struct blitter_context *blitter,
                                 0, 1, &sampler_state);
    }
 
    if (scissor) {
       pipe->set_scissor_states(pipe, 0, 1, scissor);
    }
 
    blitter_set_common_draw_rect_state(ctx, scissor != NULL);
 
    do_blits(ctx, dst, dstbox, src, src_width0, src_height0,
-            srcbox, blit_depth || blit_stencil, use_txf);
+            srcbox, dst_has_depth || dst_has_stencil, use_txf);
 
    util_blitter_restore_vertex_states(blitter);
    util_blitter_restore_fragment_states(blitter);
    util_blitter_restore_textures(blitter);
    util_blitter_restore_fb_state(blitter);
    if (scissor) {
       pipe->set_scissor_states(pipe, 0, 1, &ctx->base.saved_scissor);
    }
    util_blitter_restore_render_cond(blitter);
    util_blitter_unset_running_flag(blitter);
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index c1f1ae47443..9e3fa55e648 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -590,15 +590,44 @@  void util_blitter_common_clear_setup(struct blitter_context *blitter,
 void util_blitter_set_running_flag(struct blitter_context *blitter);
 void util_blitter_unset_running_flag(struct blitter_context *blitter);
 
 void util_blitter_restore_vertex_states(struct blitter_context *blitter);
 void util_blitter_restore_fragment_states(struct blitter_context *blitter);
 void util_blitter_restore_render_cond(struct blitter_context *blitter);
 void util_blitter_restore_fb_state(struct blitter_context *blitter);
 void util_blitter_restore_textures(struct blitter_context *blitter);
 void util_blitter_restore_constant_buffer_state(struct blitter_context *blitter);
 
+/* These are supported combinations of blits from ZS to color and vice versa.
+ * The blitter will do the packing/unpacking of depth and stencil
+ * in the fragment shader.
+ */
+static inline enum pipe_format
+util_blitter_get_color_format_for_zs(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:
+      return PIPE_FORMAT_R16_UNORM;
+
+   case PIPE_FORMAT_Z32_FLOAT:
+      return PIPE_FORMAT_R32_FLOAT;
+
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      return PIPE_FORMAT_R32_UINT;
+
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return PIPE_FORMAT_R32G32_UINT;
+
+   case PIPE_FORMAT_Z32_UNORM:
+   default:
+      assert(0);
+   }
+}
+
 #ifdef __cplusplus
 }
 #endif
 
 #endif
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index c111eaf1db5..2fdd60b0bb3 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -888,10 +888,171 @@  util_make_geometry_passthrough_shader(struct pipe_context *pipe,
 
    /* EMIT IMM[0] */
    ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1, 0);
 
    /* END */
    ureg_END(ureg);
 
    return ureg_create_shader_and_destroy(ureg, pipe);
 }
 
+/**
+ * Blit from color to ZS or from ZS to color in a manner that is equivalent
+ * to memcpy.
+ *
+ * Color is either R32_UINT (for Z24S8 / S8Z24) or R32G32_UINT (Z32_S8X24).
+ *
+ * Depth and stencil samplers are used to load depth and stencil,
+ * and they are packed and the result is written to a color output.
+ *   OR
+ * A color sampler is used to load a color value, which is unpacked and
+ * written to depth and stencil shader outputs.
+ */
+void *
+util_make_fs_pack_color_zs(struct pipe_context *pipe,
+                           enum tgsi_texture_type tex_target,
+                           enum pipe_format zs_format,
+                           bool dst_is_color)
+{
+   struct ureg_program *ureg;
+   struct ureg_src depth_sampler, stencil_sampler, color_sampler, coord;
+   struct ureg_dst out, depth, depth_x, stencil, out_depth, out_stencil, color;
+
+   assert(zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || /* color is R32_UINT */
+          zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || /* color is R32_UINT */
+          zs_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || /* color is R32G32_UINT */
+          zs_format == PIPE_FORMAT_Z24X8_UNORM || /* color is R32_UINT */
+          zs_format == PIPE_FORMAT_X8Z24_UNORM); /* color is R32_UINT */
+
+   bool has_stencil = zs_format != PIPE_FORMAT_Z24X8_UNORM &&
+                      zs_format != PIPE_FORMAT_X8Z24_UNORM;
+   bool is_z24 = zs_format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
+   bool z24_is_high = zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+                      zs_format == PIPE_FORMAT_X8Z24_UNORM;
+
+   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
+   if (!ureg)
+      return NULL;
+
+   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
+                              TGSI_INTERPOLATE_LINEAR);
+
+   if (dst_is_color) {
+      /* Load depth. */
+      depth_sampler = ureg_DECL_sampler(ureg, 0);
+      ureg_DECL_sampler_view(ureg, 0, tex_target,
+                             TGSI_RETURN_TYPE_FLOAT,
+                             TGSI_RETURN_TYPE_FLOAT,
+                             TGSI_RETURN_TYPE_FLOAT,
+                             TGSI_RETURN_TYPE_FLOAT);
+
+      depth = ureg_DECL_temporary(ureg);
+      depth_x = ureg_writemask(depth, TGSI_WRITEMASK_X);
+      ureg_load_tex(ureg, depth_x, coord, depth_sampler, tex_target, true, true);
+
+      /* Pack to Z24. */
+      if (is_z24) {
+         double imm = 0xffffff;
+         struct ureg_src imm_f64 = ureg_DECL_immediate_f64(ureg, &imm, 2);
+         struct ureg_dst tmp_xy = ureg_writemask(ureg_DECL_temporary(ureg),
+                                                 TGSI_WRITEMASK_XY);
+
+         ureg_F2D(ureg, tmp_xy, ureg_src(depth));
+         ureg_DMUL(ureg, tmp_xy, ureg_src(tmp_xy), imm_f64);
+         ureg_D2U(ureg, depth_x, ureg_src(tmp_xy));
+
+         if (z24_is_high)
+            ureg_SHL(ureg, depth_x, ureg_src(depth), ureg_imm1u(ureg, 8));
+         else
+            ureg_AND(ureg, depth_x, ureg_src(depth), ureg_imm1u(ureg, 0xffffff));
+      }
+
+      if (has_stencil) {
+         /* Load stencil. */
+         stencil_sampler = ureg_DECL_sampler(ureg, 1);
+         ureg_DECL_sampler_view(ureg, 0, tex_target,
+                                TGSI_RETURN_TYPE_UINT,
+                                TGSI_RETURN_TYPE_UINT,
+                                TGSI_RETURN_TYPE_UINT,
+                                TGSI_RETURN_TYPE_UINT);
+
+         stencil = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
+         ureg_load_tex(ureg, stencil, coord, stencil_sampler, tex_target,
+                       true, true);
+
+         /* Pack stencil into depth. */
+         if (is_z24) {
+            if (!z24_is_high)
+               ureg_SHL(ureg, stencil, ureg_src(stencil), ureg_imm1u(ureg, 24));
+
+            ureg_OR(ureg, depth_x, ureg_src(depth), ureg_src(stencil));
+         }
+      }
+
+      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+
+      if (is_z24) {
+         ureg_MOV(ureg, ureg_writemask(out, TGSI_WRITEMASK_X), ureg_src(depth));
+      } else {
+         /* Z32_S8X24 */
+         ureg_MOV(ureg, ureg_writemask(depth, TGSI_WRITEMASK_Y),
+                  ureg_scalar(ureg_src(stencil), TGSI_SWIZZLE_X));
+         ureg_MOV(ureg, ureg_writemask(out, TGSI_WRITEMASK_XY), ureg_src(depth));
+      }
+   } else {
+      color_sampler = ureg_DECL_sampler(ureg, 0);
+      ureg_DECL_sampler_view(ureg, 0, tex_target,
+                             TGSI_RETURN_TYPE_UINT,
+                             TGSI_RETURN_TYPE_UINT,
+                             TGSI_RETURN_TYPE_UINT,
+                             TGSI_RETURN_TYPE_UINT);
+
+      color = ureg_DECL_temporary(ureg);
+      ureg_load_tex(ureg, color, coord, color_sampler, tex_target, true, true);
+
+      depth = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
+      stencil = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X);
+
+      if (is_z24) {
+         double imm = 1.0 / 0xffffff;
+         struct ureg_src imm_f64 = ureg_DECL_immediate_f64(ureg, &imm, 2);
+         struct ureg_dst tmp_xy = ureg_writemask(ureg_DECL_temporary(ureg),
+                                                 TGSI_WRITEMASK_XY);
+
+         ureg_UBFE(ureg, depth, ureg_src(color),
+                   ureg_imm1u(ureg, z24_is_high ? 8 : 0),
+                   ureg_imm1u(ureg, 24));
+         ureg_U2D(ureg, tmp_xy, ureg_src(depth));
+         ureg_DMUL(ureg, tmp_xy, ureg_src(tmp_xy), imm_f64);
+         ureg_D2F(ureg, depth, ureg_src(tmp_xy));
+      } else {
+         /* depth = color.x; (Z32_S8X24) */
+         ureg_MOV(ureg, depth, ureg_src(color));
+      }
+
+      out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+      ureg_MOV(ureg, ureg_writemask(out_depth, TGSI_WRITEMASK_Z),
+               ureg_scalar(ureg_src(depth), TGSI_SWIZZLE_X));
+
+      if (has_stencil) {
+         if (is_z24) {
+            ureg_UBFE(ureg, stencil, ureg_src(color),
+                      ureg_imm1u(ureg, z24_is_high ? 0 : 24),
+                      ureg_imm1u(ureg, 8));
+         } else {
+            /* stencil = color.y[0:7]; (Z32_S8X24) */
+            ureg_UBFE(ureg, stencil,
+                      ureg_scalar(ureg_src(color), TGSI_SWIZZLE_Y),
+                      ureg_imm1u(ureg, 0),
+                      ureg_imm1u(ureg, 8));
+         }
+
+         out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0);
+         ureg_MOV(ureg, ureg_writemask(out_stencil, TGSI_WRITEMASK_Y),
+                  ureg_scalar(ureg_src(stencil), TGSI_SWIZZLE_X));
+      }
+   }
+
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index 4d4f5e97f5b..501906d6fd3 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -146,16 +146,22 @@  util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                    enum tgsi_texture_type tgsi_tex,
                                    unsigned nr_samples,
                                    enum tgsi_return_type stype);
 
 extern void *
 util_make_geometry_passthrough_shader(struct pipe_context *pipe,
                                       uint num_attribs,
                                       const ubyte *semantic_names,
                                       const ubyte *semantic_indexes);
 
+void *
+util_make_fs_pack_color_zs(struct pipe_context *pipe,
+                           enum tgsi_texture_type tex_target,
+                           enum pipe_format zs_format,
+                           bool dst_is_color);
+
 #ifdef __cplusplus
 }
 #endif
 
 
 #endif