[Spice-devel,v6,12/26] server: Avoid copying the input frame in the GStreamer encoder

Submitted by Francois Gouget on Oct. 14, 2015, 3:33 p.m.

Details

Message ID alpine.DEB.2.20.1510141418290.17509@amboise
State New
Headers show

Not browsing as part of any series.

Commit Message

Francois Gouget Oct. 14, 2015, 3:33 p.m.
This relies on the GStreamer buffer's lifetime being short enough which
it is because:
 - We encode frames one by one.
 - For all encoders but MJPEG, the first element of the pipeline will
   convert the bitmap to another image format which entails copying it.
   So by the time the encoder starts its work, this buffer will not be
   needed anymore.
 - The MJPEG encoder does not perform inter-frame compression and thus
   does not need to keep hold of this buffer once it has processed it.

Note that we can only avoid copies for the first 1 Mpixels or so.
That's because Spice splits larger frames into more chunks than we can
fit GstMemory fragments in a GStreamer buffer. So if there are more
pixels we will avoid copies for the first 3840 KB and copy the rest.

Signed-off-by: Francois Gouget <fgouget@codeweavers.com>
---
 server/gstreamer_encoder.c | 131 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 124 insertions(+), 7 deletions(-)


Changes since the previous version:
 * Fixed an empty if() block in zero_copy().
 * Renamed chunk to chunk_index.
 * Remove the b pointer and tweaked dst handling a bit.
 * Added a spice_assert() around gst_memory_map() as it's really 
   not supposed to fail given the way we call it.
 * Added a note about GST_MAP_INFO_INIT.

Patch hide | download patch | download mbox

diff --git a/server/gstreamer_encoder.c b/server/gstreamer_encoder.c
index a32e9e2..e31b7c1 100644
--- a/server/gstreamer_encoder.c
+++ b/server/gstreamer_encoder.c
@@ -30,6 +30,8 @@ 
 
 #define GSTE_DEFAULT_FPS 30
 
+#define DO_ZERO_COPY
+
 
 typedef struct {
     SpiceBitmapFmt spice_format;
@@ -71,6 +73,11 @@  typedef struct SpiceGstEncoder {
     GstElement *gstenc;
     GstAppSink *appsink;
 
+#ifdef DO_ZERO_COPY
+    /* Set to TRUE until GStreamer no longer needs the raw bitmap buffer. */
+    gboolean needs_bitmap;
+#endif
+
     /* The frame counter for GStreamer buffers */
     uint32_t frame;
 
@@ -347,6 +354,79 @@  static inline int line_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
      return TRUE;
 }
 
+#ifdef DO_ZERO_COPY
+/* A helper for zero_copy() */
+static void unref_bitmap(gpointer mem)
+{
+    SpiceGstEncoder *encoder = (SpiceGstEncoder*)mem;
+    encoder->needs_bitmap = FALSE;
+}
+
+/* A helper for push_raw_frame(). */
+static inline int zero_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
+                            GstBuffer *buffer, uint32_t *chunk_index,
+                            uint32_t *chunk_offset, uint32_t *len)
+{
+    /* We cannot control the lifetime of the bitmap but we can wrap it in
+     * the buffer anyway because:
+     * - Before returning from gst_encoder_encode_frame() we wait for the
+     *   pipeline to have converted this frame into a compressed buffer.
+     *   So it has to have gone through the frame at least once.
+     * - For all encoders but MJPEG, the first element of the pipeline will
+     *   convert the bitmap to another image format which entails copying
+     *   it. So by the time the encoder starts its work, this buffer will
+     *   not be needed anymore.
+     * - The MJPEG encoder does not perform inter-frame compression and thus
+     *   does not need to keep hold of this buffer once it has processed it.
+     * encoder->needs_bitmap lets us verify that these conditions still hold
+     * true through an assert.
+     */
+    SpiceChunks *chunks = bitmap->data;
+    while (*chunk_offset >= chunks->chunk[*chunk_index].len) {
+        /* Make sure that the chunk is not padded */
+        if (chunks->chunk[*chunk_index].len % bitmap->stride != 0) {
+            return FALSE;
+        }
+        *chunk_offset -= chunks->chunk[*chunk_index].len;
+        (*chunk_index)++;
+    }
+
+    int max_mem = gst_buffer_get_max_memory();
+    if (chunks->num_chunks - *chunk_index > max_mem) {
+        /* There are more chunks than we can fit memory objects in a
+         * buffer. This will cause the buffer to merge memory objects to
+         * fit the extra chunks, which means doing wasteful memory copies.
+         * So use the zero-copy approach for the first max_mem-1 chunks, and
+         * let push_raw_frame() add another memory object to copy the rest.
+         */
+        max_mem = *chunk_index + max_mem - 1;
+    } else {
+        max_mem = chunks->num_chunks;
+    }
+
+    while (*len && *chunk_index < max_mem) {
+        /* Make sure that the chunk is not padded */
+        if (chunks->chunk[*chunk_index].len % bitmap->stride != 0) {
+            spice_warning("chunk %d/%d is padded, cannot zero-copy", *chunk_index,
+                          chunks->num_chunks);
+            return FALSE;
+        }
+        uint32_t thislen = MIN(chunks->chunk[*chunk_index].len - *chunk_offset, *len);
+        GstMemory *mem = gst_memory_new_wrapped(GST_MEMORY_FLAG_READONLY,
+                                                chunks->chunk[*chunk_index].data,
+                                                chunks->chunk[*chunk_index].len,
+                                                *chunk_offset, thislen,
+                                                encoder, &unref_bitmap);
+        gst_buffer_append_memory(buffer, mem);
+        *len -= thislen;
+        *chunk_offset = 0;
+        (*chunk_index)++;
+    }
+    encoder->needs_bitmap = TRUE;
+    return TRUE;
+}
+#endif
+
 /* A helper for gst_encoder_encode_frame(). */
 static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
                           const SpiceRect *src, int top_down)
@@ -354,10 +434,9 @@  static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
     const uint32_t height = src->bottom - src->top;
     const uint32_t stream_stride = (src->right - src->left) * encoder->format->bpp / 8;
     uint32_t len = stream_stride * height;
-    GstBuffer *buffer = gst_buffer_new_and_alloc(len);
-    GstMapInfo map;
-    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
-    uint8_t *dst = map.data;
+    GstBuffer *buffer = gst_buffer_new();
+    /* TODO Use GST_MAP_INFO_INIT once GStreamer 1.4.5 is no longer relevant */
+    GstMapInfo map = { .memory = NULL };
 
     /* Note that we should not reorder the lines, even if top_down is false.
      * It just changes the number of lines to skip at the start of the bitmap.
@@ -369,9 +448,18 @@  static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
         /* We have to do a line-by-line copy because for each we have to leave
          * out pixels on the left or right.
          */
+        GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
+        if (!mem) {
+            gst_buffer_unref(buffer);
+            return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+        }
+        spice_assert(gst_memory_map(mem, &map, GST_MAP_WRITE));
+        uint8_t *dst = map.data;
+
         chunk_offset += src->left * encoder->format->bpp / 8;
         if (!line_copy(encoder, bitmap, chunk_offset, stream_stride, height, dst)) {
-            gst_buffer_unmap(buffer, &map);
+            gst_memory_unmap(map.memory, &map);
+            gst_memory_unref(map.memory);
             gst_buffer_unref(buffer);
             return VIDEO_ENCODER_FRAME_UNSUPPORTED;
         }
@@ -380,10 +468,32 @@  static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
         SpiceChunks *chunks = bitmap->data;
         uint32_t chunk_index = 0;
         /* We can copy the bitmap chunk by chunk */
+#ifdef DO_ZERO_COPY
+        if (!zero_copy(encoder, bitmap, buffer, &chunk_index, &chunk_offset, &len)) {
+            gst_buffer_unref(buffer);
+            return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+        }
+        /* Now we must avoid any write to the GstBuffer object as it would
+         * cause a copy of the read-only memory objects we just added.
+         * Fortunately we can append extra writable memory objects instead.
+         */
+#endif
+
+        if (len) {
+            GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
+            if (!mem) {
+                gst_buffer_unref(buffer);
+                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+            }
+            spice_assert(gst_memory_map(mem, &map, GST_MAP_WRITE));
+        }
+        uint8_t *dst = map.data;
+
         while (len && chunk_index < chunks->num_chunks) {
             /* Make sure that the chunk is not padded */
             if (chunks->chunk[chunk_index].len % bitmap->stride != 0) {
-                gst_buffer_unmap(buffer, &map);
+                gst_memory_unmap(map.memory, &map);
+                gst_memory_unref(map.memory);
                 gst_buffer_unref(buffer);
                 spice_warning("chunk %d/%d is padded, cannot copy it",
                               chunk_index, chunks->num_chunks);
@@ -405,7 +515,10 @@  static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
         }
         spice_assert(len == 0);
     }
-    gst_buffer_unmap(buffer, &map);
+    if (map.memory) {
+        gst_memory_unmap(map.memory, &map);
+        gst_buffer_append_memory(buffer, map.memory);
+    }
     GST_BUFFER_OFFSET(buffer) = encoder->frame++;
 
     GstFlowReturn ret = gst_app_src_push_buffer(encoder->appsrc, buffer);
@@ -483,6 +596,10 @@  static int gst_encoder_encode_frame(VideoEncoder *video_encoder,
     int rc = push_raw_frame(encoder, bitmap, src, top_down);
     if (rc == VIDEO_ENCODER_FRAME_ENCODE_DONE) {
         rc = pull_compressed_buffer(encoder, video_buffer);
+#ifdef DO_ZERO_COPY
+        /* GStreamer should have released the source frame buffer by now */
+        spice_assert(!encoder->needs_bitmap);
+#endif
     }
     return rc;
 }

Comments

On Wed, Oct 14, 2015 at 05:33:45PM +0200, Francois Gouget wrote:
> This relies on the GStreamer buffer's lifetime being short enough which
> it is because:
>  - We encode frames one by one.
>  - For all encoders but MJPEG, the first element of the pipeline will
>    convert the bitmap to another image format which entails copying it.
>    So by the time the encoder starts its work, this buffer will not be
>    needed anymore.
>  - The MJPEG encoder does not perform inter-frame compression and thus
>    does not need to keep hold of this buffer once it has processed it.
> 
> Note that we can only avoid copies for the first 1 Mpixels or so.
> That's because Spice splits larger frames into more chunks than we can
> fit GstMemory fragments in a GStreamer buffer. So if there are more
> pixels we will avoid copies for the first 3840 KB and copy the rest.
> 
> Signed-off-by: Francois Gouget <fgouget@codeweavers.com>
> ---
>  server/gstreamer_encoder.c | 131 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 124 insertions(+), 7 deletions(-)
> 
> 
> Changes since the previous version:
>  * Fixed an empty if() block in zero_copy().
>  * Renamed chunk to chunk_index.
>  * Remove the b pointer and tweaked dst handling a bit.
>  * Added a spice_assert() around gst_memory_map() as it's really 
>    not supposed to fail given the way we call it.
>  * Added a note about GST_MAP_INFO_INIT.
> 
> 
> diff --git a/server/gstreamer_encoder.c b/server/gstreamer_encoder.c
> index a32e9e2..e31b7c1 100644
> --- a/server/gstreamer_encoder.c
> +++ b/server/gstreamer_encoder.c
> @@ -30,6 +30,8 @@
>  
>  #define GSTE_DEFAULT_FPS 30
>  
> +#define DO_ZERO_COPY

Do we need this #ifdef magic btw?

> +
>  
>  typedef struct {
>      SpiceBitmapFmt spice_format;
> @@ -71,6 +73,11 @@ typedef struct SpiceGstEncoder {
>      GstElement *gstenc;
>      GstAppSink *appsink;
>  
> +#ifdef DO_ZERO_COPY
> +    /* Set to TRUE until GStreamer no longer needs the raw bitmap buffer. */
> +    gboolean needs_bitmap;
> +#endif
> +
>      /* The frame counter for GStreamer buffers */
>      uint32_t frame;
>  
> @@ -347,6 +354,79 @@ static inline int line_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>       return TRUE;
>  }
>  
> +#ifdef DO_ZERO_COPY
> +/* A helper for zero_copy() */
> +static void unref_bitmap(gpointer mem)
> +{
> +    SpiceGstEncoder *encoder = (SpiceGstEncoder*)mem;
> +    encoder->needs_bitmap = FALSE;
> +}
> +
> +/* A helper for push_raw_frame(). */
> +static inline int zero_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
> +                            GstBuffer *buffer, uint32_t *chunk_index,
> +                            uint32_t *chunk_offset, uint32_t *len)
> +{
> +    /* We cannot control the lifetime of the bitmap but we can wrap it in
> +     * the buffer anyway because:
> +     * - Before returning from gst_encoder_encode_frame() we wait for the
> +     *   pipeline to have converted this frame into a compressed buffer.
> +     *   So it has to have gone through the frame at least once.
> +     * - For all encoders but MJPEG, the first element of the pipeline will
> +     *   convert the bitmap to another image format which entails copying
> +     *   it. So by the time the encoder starts its work, this buffer will
> +     *   not be needed anymore.
> +     * - The MJPEG encoder does not perform inter-frame compression and thus
> +     *   does not need to keep hold of this buffer once it has processed it.
> +     * encoder->needs_bitmap lets us verify that these conditions still hold
> +     * true through an assert.
> +     */
> +    SpiceChunks *chunks = bitmap->data;
> +    while (*chunk_offset >= chunks->chunk[*chunk_index].len) {
> +        /* Make sure that the chunk is not padded */
> +        if (chunks->chunk[*chunk_index].len % bitmap->stride != 0) {

This deserves a helper function rather than comment + convoluted code
every time.

> +            return FALSE;
> +        }
> +        *chunk_offset -= chunks->chunk[*chunk_index].len;
> +        (*chunk_index)++;
> +    }
> +
> +    int max_mem = gst_buffer_get_max_memory();
> +    if (chunks->num_chunks - *chunk_index > max_mem) {
> +        /* There are more chunks than we can fit memory objects in a
> +         * buffer. This will cause the buffer to merge memory objects to
> +         * fit the extra chunks, which means doing wasteful memory copies.
> +         * So use the zero-copy approach for the first max_mem-1 chunks, and
> +         * let push_raw_frame() add another memory object to copy the rest.
> +         */
> +        max_mem = *chunk_index + max_mem - 1;
> +    } else {
> +        max_mem = chunks->num_chunks;
> +    }
> +
> +    while (*len && *chunk_index < max_mem) {
> +        /* Make sure that the chunk is not padded */
> +        if (chunks->chunk[*chunk_index].len % bitmap->stride != 0) {
> +            spice_warning("chunk %d/%d is padded, cannot zero-copy", *chunk_index,
> +                          chunks->num_chunks);
> +            return FALSE;
> +        }
> +        uint32_t thislen = MIN(chunks->chunk[*chunk_index].len - *chunk_offset, *len);
> +        GstMemory *mem = gst_memory_new_wrapped(GST_MEMORY_FLAG_READONLY,
> +                                                chunks->chunk[*chunk_index].data,
> +                                                chunks->chunk[*chunk_index].len,
> +                                                *chunk_offset, thislen,
> +                                                encoder, &unref_bitmap);

unref_bitmap is goin to get called once per chunk, is this intentional?
(I don't mind if it's called just once or once per chunk, but I prefer
to check you considered this).

> +        gst_buffer_append_memory(buffer, mem);
> +        *len -= thislen;
> +        *chunk_offset = 0;
> +        (*chunk_index)++;
> +    }
> +    encoder->needs_bitmap = TRUE;
> +    return TRUE;
> +}
> +#endif
> +
>  /* A helper for gst_encoder_encode_frame(). */
>  static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>                            const SpiceRect *src, int top_down)
> @@ -354,10 +434,9 @@ static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>      const uint32_t height = src->bottom - src->top;
>      const uint32_t stream_stride = (src->right - src->left) * encoder->format->bpp / 8;
>      uint32_t len = stream_stride * height;
> -    GstBuffer *buffer = gst_buffer_new_and_alloc(len);
> -    GstMapInfo map;
> -    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
> -    uint8_t *dst = map.data;
> +    GstBuffer *buffer = gst_buffer_new();
> +    /* TODO Use GST_MAP_INFO_INIT once GStreamer 1.4.5 is no longer relevant */
> +    GstMapInfo map = { .memory = NULL };
>  
>      /* Note that we should not reorder the lines, even if top_down is false.
>       * It just changes the number of lines to skip at the start of the bitmap.
> @@ -369,9 +448,18 @@ static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>          /* We have to do a line-by-line copy because for each we have to leave
>           * out pixels on the left or right.
>           */
> +        GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
> +        if (!mem) {
> +            gst_buffer_unref(buffer);
> +            return VIDEO_ENCODER_FRAME_UNSUPPORTED;
> +        }
> +        spice_assert(gst_memory_map(mem, &map, GST_MAP_WRITE));

I think this could be gst_buffer_set_size + regular gst_buffer_map?

> +        uint8_t *dst = map.data;
> +
>          chunk_offset += src->left * encoder->format->bpp / 8;
>          if (!line_copy(encoder, bitmap, chunk_offset, stream_stride, height, dst)) {
> -            gst_buffer_unmap(buffer, &map);
> +            gst_memory_unmap(map.memory, &map);
> +            gst_memory_unref(map.memory);
>              gst_buffer_unref(buffer);
>              return VIDEO_ENCODER_FRAME_UNSUPPORTED;
>          }
> @@ -380,10 +468,32 @@ static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>          SpiceChunks *chunks = bitmap->data;
>          uint32_t chunk_index = 0;
>          /* We can copy the bitmap chunk by chunk */
> +#ifdef DO_ZERO_COPY
> +        if (!zero_copy(encoder, bitmap, buffer, &chunk_index, &chunk_offset, &len)) {
> +            gst_buffer_unref(buffer);
> +            return VIDEO_ENCODER_FRAME_UNSUPPORTED;
> +        }
> +        /* Now we must avoid any write to the GstBuffer object as it would
> +         * cause a copy of the read-only memory objects we just added.
> +         * Fortunately we can append extra writable memory objects instead.
> +         */
> +#endif
> +

Can you add a note that after calling zero_copy(), 'len' is the amount
of bytes that could not be zero-copied and remain to be copied? This was
initially quite confusing for me.

> +        if (len) {
> +            GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
> +            if (!mem) {
> +                gst_buffer_unref(buffer);
> +                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
> +            }
> +            spice_assert(gst_memory_map(mem, &map, GST_MAP_WRITE));
> +        }
> +        uint8_t *dst = map.data;
> +
>          while (len && chunk_index < chunks->num_chunks) {
>              /* Make sure that the chunk is not padded */

This copying could probably go to a helper function as well?
All in all, looks good!

Christophe

>              if (chunks->chunk[chunk_index].len % bitmap->stride != 0) {
> -                gst_buffer_unmap(buffer, &map);
> +                gst_memory_unmap(map.memory, &map);
> +                gst_memory_unref(map.memory);
>                  gst_buffer_unref(buffer);
>                  spice_warning("chunk %d/%d is padded, cannot copy it",
>                                chunk_index, chunks->num_chunks);
> @@ -405,7 +515,10 @@ static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>          }
>          spice_assert(len == 0);
>      }
> -    gst_buffer_unmap(buffer, &map);
> +    if (map.memory) {
> +        gst_memory_unmap(map.memory, &map);
> +        gst_buffer_append_memory(buffer, map.memory);
> +    }
>      GST_BUFFER_OFFSET(buffer) = encoder->frame++;
>  
>      GstFlowReturn ret = gst_app_src_push_buffer(encoder->appsrc, buffer);
> @@ -483,6 +596,10 @@ static int gst_encoder_encode_frame(VideoEncoder *video_encoder,
>      int rc = push_raw_frame(encoder, bitmap, src, top_down);
>      if (rc == VIDEO_ENCODER_FRAME_ENCODE_DONE) {
>          rc = pull_compressed_buffer(encoder, video_buffer);
> +#ifdef DO_ZERO_COPY
> +        /* GStreamer should have released the source frame buffer by now */
> +        spice_assert(!encoder->needs_bitmap);
> +#endif
>      }
>      return rc;
>  }
> -- 
> 2.6.1
> 
> _______________________________________________
> Spice-devel mailing list
> Spice-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/spice-devel