[04/10] radeon/vcn: implement jpeg decode functions

Submitted by Zhang, Boyuan on Aug. 2, 2018, 7:44 p.m.

Details

Message ID 1533239090-1865-4-git-send-email-boyuan.zhang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Zhang, Boyuan Aug. 2, 2018, 7:44 p.m.
From: Boyuan Zhang <boyuan.zhang@amd.com>

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
---
 src/gallium/drivers/radeon/radeon_vcn_jpeg.c | 286 ++++++++++++++++++++++++++-
 1 file changed, 281 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
index c078131..72dff57 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
@@ -63,12 +63,195 @@  struct radeon_jpeg_decoder {
 	unsigned			dt_chroma_top_offset;
 };
 
+static void radeon_jpeg_destroy_associated_data(void *data)
+{
+	/* NOOP, since we only use an intptr */
+}
+
+static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_jpeg_decoder *dec,
+					struct pipe_video_buffer *target,
+					struct pipe_picture_desc *picture)
+{
+	struct r600_texture *luma = (struct r600_texture *)
+				((struct vl_video_buffer *)target)->resources[0];
+	struct r600_texture *chroma = (struct r600_texture *)
+				((struct vl_video_buffer *)target)->resources[1];
+
+	dec->bsd_size = align(dec->bs_size, 128);
+	dec->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
+	if (target->buffer_format == PIPE_FORMAT_NV12) {
+		dec->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
+		dec->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
+	}
+	else if (target->buffer_format == PIPE_FORMAT_YUYV)
+		dec->dt_pitch = luma->surface.u.gfx9.surf_pitch;
+	dec->dt_uv_pitch = dec->dt_pitch / 2;
+
+	return luma->resource.buf;
+}
+
+static void get_mjpeg_slice_header(struct radeon_jpeg_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
+{
+	int size = 0, saved_size, len_pos, i;
+	uint16_t *bs;
+	uint8_t *buf = dec->bs_ptr;
+
+	/* SOI */
+	buf[size++] = 0xff;
+	buf[size++] = 0xd8;
+
+	/* DQT */
+	buf[size++] = 0xff;
+	buf[size++] = 0xdb;
+
+	len_pos = size++;
+	size++;
+
+	for (i = 0; i < 4; ++i) {
+		if (pic->quantization_table.load_quantiser_table[i] == 0)
+			continue;
+
+		buf[size++] = i;
+		memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
+		size += 64;
+	}
+
+	bs = (uint16_t*)&buf[len_pos];
+	*bs = util_bswap16(size - 4);
+
+	saved_size = size;
+
+	/* DHT */
+	buf[size++] = 0xff;
+	buf[size++] = 0xc4;
+
+	len_pos = size++;
+	size++;
+
+	for (i = 0; i < 2; ++i) {
+		if (pic->huffman_table.load_huffman_table[i] == 0)
+			continue;
+
+		buf[size++] = 0x00 | i;
+		memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
+		size += 16;
+		memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
+		size += 12;
+	}
+
+	for (i = 0; i < 2; ++i) {
+		if (pic->huffman_table.load_huffman_table[i] == 0)
+			continue;
+
+		buf[size++] = 0x10 | i;
+		memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
+		size += 16;
+		memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
+		size += 162;
+	}
+
+	bs = (uint16_t*)&buf[len_pos];
+	*bs = util_bswap16(size - saved_size - 2);
+
+	saved_size = size;
+
+	/* DRI */
+	if (pic->slice_parameter.restart_interval) {
+		buf[size++] = 0xff;
+		buf[size++] = 0xdd;
+		buf[size++] = 0x00;
+		buf[size++] = 0x04;
+		bs = (uint16_t*)&buf[size++];
+		*bs = util_bswap16(pic->slice_parameter.restart_interval);
+		saved_size = ++size;
+	}
+
+	/* SOF */
+	buf[size++] = 0xff;
+	buf[size++] = 0xc0;
+
+	len_pos = size++;
+	size++;
+
+	buf[size++] = 0x08;
+
+	bs = (uint16_t*)&buf[size++];
+	*bs = util_bswap16(pic->picture_parameter.picture_height);
+	size++;
+
+	bs = (uint16_t*)&buf[size++];
+	*bs = util_bswap16(pic->picture_parameter.picture_width);
+	size++;
+
+	buf[size++] = pic->picture_parameter.num_components;
+
+	for (i = 0; i < pic->picture_parameter.num_components; ++i) {
+		buf[size++] = pic->picture_parameter.components[i].component_id;
+		buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
+					  pic->picture_parameter.components[i].v_sampling_factor;
+		buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
+	}
+
+	bs = (uint16_t*)&buf[len_pos];
+	*bs = util_bswap16(size - saved_size - 2);
+
+	saved_size = size;
+
+	/* SOS */
+	buf[size++] = 0xff;
+	buf[size++] = 0xda;
+
+	len_pos = size++;
+	size++;
+
+	buf[size++] = pic->slice_parameter.num_components;
+
+	for (i = 0; i < pic->slice_parameter.num_components; ++i) {
+		buf[size++] = pic->slice_parameter.components[i].component_selector;
+		buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
+					  pic->slice_parameter.components[i].ac_table_selector;
+	}
+
+	buf[size++] = 0x00;
+	buf[size++] = 0x3f;
+	buf[size++] = 0x00;
+
+	bs = (uint16_t*)&buf[len_pos];
+	*bs = util_bswap16(size - saved_size - 2);
+
+	dec->bs_ptr += size;
+	dec->bs_size += size;
+}
+
 /* flush IB to the hardware */
 static int flush(struct radeon_jpeg_decoder *dec, unsigned flags)
 {
 	return dec->ws->cs_flush(dec->cs, flags, NULL);
 }
 
+/* add a new set register command to the IB */
+static void set_reg_jpeg(struct radeon_jpeg_decoder *dec, unsigned reg,
+					unsigned cond, unsigned type, uint32_t val)
+{
+	/* TODO */
+}
+
+/* send a bitstream buffer command */
+static void send_cmd_bitstream(struct radeon_jpeg_decoder *dec,
+		     struct pb_buffer* buf, uint32_t off,
+		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
+{
+	/* TODO */
+}
+
+/* send a target buffer command */
+static void send_cmd_target(struct radeon_jpeg_decoder *dec,
+		     struct pb_buffer* buf, uint32_t off,
+		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
+{
+	/* TODO */
+}
+
 /* cycle to the next set of buffers */
 static void next_buffer(struct radeon_jpeg_decoder *dec)
 {
@@ -81,7 +264,20 @@  static void next_buffer(struct radeon_jpeg_decoder *dec)
  */
 static void radeon_jpeg_destroy(struct pipe_video_codec *decoder)
 {
-	/* TODO */
+	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
+	unsigned i;
+
+	assert(decoder);
+
+	flush(dec, 0);
+
+	dec->ws->cs_destroy(dec->cs);
+
+	for (i = 0; i < NUM_BUFFERS; ++i) {
+		si_vid_destroy_buffer(&dec->bs_buffers[i]);
+	}
+
+	FREE(dec);
 }
 
 /**
@@ -91,7 +287,19 @@  static void radeon_jpeg_begin_frame(struct pipe_video_codec *decoder,
 			     struct pipe_video_buffer *target,
 			     struct pipe_picture_desc *picture)
 {
-	/* TODO */
+	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
+	uintptr_t frame;
+
+	assert(decoder);
+
+	frame = ++dec->frame_number;
+	vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
+					    &radeon_jpeg_destroy_associated_data);
+
+	dec->bs_size = 0;
+	dec->bs_ptr = dec->ws->buffer_map(
+		dec->bs_buffers[dec->cur_buffer].res->buf,
+		dec->cs, PIPE_TRANSFER_WRITE);
 }
 
 /**
@@ -103,7 +311,8 @@  static void radeon_jpeg_decode_macroblock(struct pipe_video_codec *decoder,
 				   const struct pipe_macroblock *macroblocks,
 				   unsigned num_macroblocks)
 {
-	/* TODO */
+	/* not supported (yet) */
+	assert(0);
 }
 
 /**
@@ -116,7 +325,51 @@  static void radeon_jpeg_decode_bitstream(struct pipe_video_codec *decoder,
 				  const void * const *buffers,
 				  const unsigned *sizes)
 {
-	/* TODO */
+	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
+	enum pipe_video_format format = u_reduce_video_profile(picture->profile);
+	unsigned i;
+
+	assert(decoder);
+
+	if (!dec->bs_ptr)
+		return;
+
+	if (format == PIPE_VIDEO_FORMAT_JPEG)
+		get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
+
+	for (i = 0; i < num_buffers; ++i) {
+		struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
+		unsigned new_size = dec->bs_size + sizes[i];
+
+		if (format == PIPE_VIDEO_FORMAT_JPEG)
+			new_size += 2; /* save for EOI */
+
+		if (new_size > buf->res->buf->size) {
+			dec->ws->buffer_unmap(buf->res->buf);
+			if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+				RVID_ERR("Can't resize bitstream buffer!");
+				return;
+			}
+
+			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+							  PIPE_TRANSFER_WRITE);
+			if (!dec->bs_ptr)
+				return;
+
+			dec->bs_ptr += dec->bs_size;
+		}
+
+		memcpy(dec->bs_ptr, buffers[i], sizes[i]);
+		dec->bs_size += sizes[i];
+		dec->bs_ptr += sizes[i];
+	}
+
+	if (format == PIPE_VIDEO_FORMAT_JPEG) {
+		((uint8_t *)dec->bs_ptr)[0] = 0xff;	/* EOI */
+		((uint8_t *)dec->bs_ptr)[1] = 0xd9;
+		dec->bs_size += 2;
+		dec->bs_ptr += 2;
+	}
 }
 
 /**
@@ -126,7 +379,30 @@  static void radeon_jpeg_end_frame(struct pipe_video_codec *decoder,
 			   struct pipe_video_buffer *target,
 			   struct pipe_picture_desc *picture)
 {
-	/* TODO */
+	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
+	struct pb_buffer *dt;
+	struct rvid_buffer *bs_buf;
+
+	assert(decoder);
+
+	if (!dec->bs_ptr)
+		return;
+
+	bs_buf = &dec->bs_buffers[dec->cur_buffer];
+
+	memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
+	dec->ws->buffer_unmap(bs_buf->res->buf);
+
+	dt = radeon_jpeg_get_decode_param(dec, target, picture);
+
+	send_cmd_bitstream(dec, bs_buf->res->buf,
+		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+	send_cmd_target(dec, dt, 0,
+		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
+
+	flush(dec, PIPE_FLUSH_ASYNC);
+
+	next_buffer(dec);
 }
 
 /**

Comments

On 08/02/2018 03:44 PM, boyuan.zhang@amd.com wrote:
> From: Boyuan Zhang <boyuan.zhang@amd.com>
>
> Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
> ---
>   src/gallium/drivers/radeon/radeon_vcn_jpeg.c | 286 ++++++++++++++++++++++++++-
>   1 file changed, 281 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> index c078131..72dff57 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> @@ -63,12 +63,195 @@ struct radeon_jpeg_decoder {
>   	unsigned			dt_chroma_top_offset;
>   };
>   
> +static void radeon_jpeg_destroy_associated_data(void *data)
> +{
> +	/* NOOP, since we only use an intptr */
> +}
> +
> +static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_jpeg_decoder *dec,
> +					struct pipe_video_buffer *target,
> +					struct pipe_picture_desc *picture)
> +{
> +	struct r600_texture *luma = (struct r600_texture *)
> +				((struct vl_video_buffer *)target)->resources[0];
> +	struct r600_texture *chroma = (struct r600_texture *)
> +				((struct vl_video_buffer *)target)->resources[1];
> +
> +	dec->bsd_size = align(dec->bs_size, 128);
> +	dec->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
> +	if (target->buffer_format == PIPE_FORMAT_NV12) {
> +		dec->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
> +		dec->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
> +	}
> +	else if (target->buffer_format == PIPE_FORMAT_YUYV)
> +		dec->dt_pitch = luma->surface.u.gfx9.surf_pitch;
> +	dec->dt_uv_pitch = dec->dt_pitch / 2;
> +
> +	return luma->resource.buf;
> +}
> +
> +static void get_mjpeg_slice_header(struct radeon_jpeg_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
It looks like the reconstruction of bitstream is still required for VCN 
JPEG, which was thought only for UVD FW JPEG decode when implementing 
UVD JPEG decode, that's why it was kept in the driver.
Since both UVD and VCN need this BS reconstruction, and it would be 
better to move it to state tracker.

Regards,
Leo

> +{
> +	int size = 0, saved_size, len_pos, i;
> +	uint16_t *bs;
> +	uint8_t *buf = dec->bs_ptr;
> +
> +	/* SOI */
> +	buf[size++] = 0xff;
> +	buf[size++] = 0xd8;
> +
> +	/* DQT */
> +	buf[size++] = 0xff;
> +	buf[size++] = 0xdb;
> +
> +	len_pos = size++;
> +	size++;
> +
> +	for (i = 0; i < 4; ++i) {
> +		if (pic->quantization_table.load_quantiser_table[i] == 0)
> +			continue;
> +
> +		buf[size++] = i;
> +		memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
> +		size += 64;
> +	}
> +
> +	bs = (uint16_t*)&buf[len_pos];
> +	*bs = util_bswap16(size - 4);
> +
> +	saved_size = size;
> +
> +	/* DHT */
> +	buf[size++] = 0xff;
> +	buf[size++] = 0xc4;
> +
> +	len_pos = size++;
> +	size++;
> +
> +	for (i = 0; i < 2; ++i) {
> +		if (pic->huffman_table.load_huffman_table[i] == 0)
> +			continue;
> +
> +		buf[size++] = 0x00 | i;
> +		memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
> +		size += 16;
> +		memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
> +		size += 12;
> +	}
> +
> +	for (i = 0; i < 2; ++i) {
> +		if (pic->huffman_table.load_huffman_table[i] == 0)
> +			continue;
> +
> +		buf[size++] = 0x10 | i;
> +		memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
> +		size += 16;
> +		memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
> +		size += 162;
> +	}
> +
> +	bs = (uint16_t*)&buf[len_pos];
> +	*bs = util_bswap16(size - saved_size - 2);
> +
> +	saved_size = size;
> +
> +	/* DRI */
> +	if (pic->slice_parameter.restart_interval) {
> +		buf[size++] = 0xff;
> +		buf[size++] = 0xdd;
> +		buf[size++] = 0x00;
> +		buf[size++] = 0x04;
> +		bs = (uint16_t*)&buf[size++];
> +		*bs = util_bswap16(pic->slice_parameter.restart_interval);
> +		saved_size = ++size;
> +	}
> +
> +	/* SOF */
> +	buf[size++] = 0xff;
> +	buf[size++] = 0xc0;
> +
> +	len_pos = size++;
> +	size++;
> +
> +	buf[size++] = 0x08;
> +
> +	bs = (uint16_t*)&buf[size++];
> +	*bs = util_bswap16(pic->picture_parameter.picture_height);
> +	size++;
> +
> +	bs = (uint16_t*)&buf[size++];
> +	*bs = util_bswap16(pic->picture_parameter.picture_width);
> +	size++;
> +
> +	buf[size++] = pic->picture_parameter.num_components;
> +
> +	for (i = 0; i < pic->picture_parameter.num_components; ++i) {
> +		buf[size++] = pic->picture_parameter.components[i].component_id;
> +		buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
> +					  pic->picture_parameter.components[i].v_sampling_factor;
> +		buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
> +	}
> +
> +	bs = (uint16_t*)&buf[len_pos];
> +	*bs = util_bswap16(size - saved_size - 2);
> +
> +	saved_size = size;
> +
> +	/* SOS */
> +	buf[size++] = 0xff;
> +	buf[size++] = 0xda;
> +
> +	len_pos = size++;
> +	size++;
> +
> +	buf[size++] = pic->slice_parameter.num_components;
> +
> +	for (i = 0; i < pic->slice_parameter.num_components; ++i) {
> +		buf[size++] = pic->slice_parameter.components[i].component_selector;
> +		buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
> +					  pic->slice_parameter.components[i].ac_table_selector;
> +	}
> +
> +	buf[size++] = 0x00;
> +	buf[size++] = 0x3f;
> +	buf[size++] = 0x00;
> +
> +	bs = (uint16_t*)&buf[len_pos];
> +	*bs = util_bswap16(size - saved_size - 2);
> +
> +	dec->bs_ptr += size;
> +	dec->bs_size += size;
> +}
> +
>   /* flush IB to the hardware */
>   static int flush(struct radeon_jpeg_decoder *dec, unsigned flags)
>   {
>   	return dec->ws->cs_flush(dec->cs, flags, NULL);
>   }
>   
> +/* add a new set register command to the IB */
> +static void set_reg_jpeg(struct radeon_jpeg_decoder *dec, unsigned reg,
> +					unsigned cond, unsigned type, uint32_t val)
> +{
> +	/* TODO */
> +}
> +
> +/* send a bitstream buffer command */
> +static void send_cmd_bitstream(struct radeon_jpeg_decoder *dec,
> +		     struct pb_buffer* buf, uint32_t off,
> +		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
> +{
> +	/* TODO */
> +}
> +
> +/* send a target buffer command */
> +static void send_cmd_target(struct radeon_jpeg_decoder *dec,
> +		     struct pb_buffer* buf, uint32_t off,
> +		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
> +{
> +	/* TODO */
> +}
> +
>   /* cycle to the next set of buffers */
>   static void next_buffer(struct radeon_jpeg_decoder *dec)
>   {
> @@ -81,7 +264,20 @@ static void next_buffer(struct radeon_jpeg_decoder *dec)
>    */
>   static void radeon_jpeg_destroy(struct pipe_video_codec *decoder)
>   {
> -	/* TODO */
> +	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> +	unsigned i;
> +
> +	assert(decoder);
> +
> +	flush(dec, 0);
> +
> +	dec->ws->cs_destroy(dec->cs);
> +
> +	for (i = 0; i < NUM_BUFFERS; ++i) {
> +		si_vid_destroy_buffer(&dec->bs_buffers[i]);
> +	}
> +
> +	FREE(dec);
>   }
>   
>   /**
> @@ -91,7 +287,19 @@ static void radeon_jpeg_begin_frame(struct pipe_video_codec *decoder,
>   			     struct pipe_video_buffer *target,
>   			     struct pipe_picture_desc *picture)
>   {
> -	/* TODO */
> +	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> +	uintptr_t frame;
> +
> +	assert(decoder);
> +
> +	frame = ++dec->frame_number;
> +	vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
> +					    &radeon_jpeg_destroy_associated_data);
> +
> +	dec->bs_size = 0;
> +	dec->bs_ptr = dec->ws->buffer_map(
> +		dec->bs_buffers[dec->cur_buffer].res->buf,
> +		dec->cs, PIPE_TRANSFER_WRITE);
>   }
>   
>   /**
> @@ -103,7 +311,8 @@ static void radeon_jpeg_decode_macroblock(struct pipe_video_codec *decoder,
>   				   const struct pipe_macroblock *macroblocks,
>   				   unsigned num_macroblocks)
>   {
> -	/* TODO */
> +	/* not supported (yet) */
> +	assert(0);
>   }
>   
>   /**
> @@ -116,7 +325,51 @@ static void radeon_jpeg_decode_bitstream(struct pipe_video_codec *decoder,
>   				  const void * const *buffers,
>   				  const unsigned *sizes)
>   {
> -	/* TODO */
> +	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> +	enum pipe_video_format format = u_reduce_video_profile(picture->profile);
> +	unsigned i;
> +
> +	assert(decoder);
> +
> +	if (!dec->bs_ptr)
> +		return;
> +
> +	if (format == PIPE_VIDEO_FORMAT_JPEG)
> +		get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
> +
> +	for (i = 0; i < num_buffers; ++i) {
> +		struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
> +		unsigned new_size = dec->bs_size + sizes[i];
> +
> +		if (format == PIPE_VIDEO_FORMAT_JPEG)
> +			new_size += 2; /* save for EOI */
> +
> +		if (new_size > buf->res->buf->size) {
> +			dec->ws->buffer_unmap(buf->res->buf);
> +			if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
> +				RVID_ERR("Can't resize bitstream buffer!");
> +				return;
> +			}
> +
> +			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
> +							  PIPE_TRANSFER_WRITE);
> +			if (!dec->bs_ptr)
> +				return;
> +
> +			dec->bs_ptr += dec->bs_size;
> +		}
> +
> +		memcpy(dec->bs_ptr, buffers[i], sizes[i]);
> +		dec->bs_size += sizes[i];
> +		dec->bs_ptr += sizes[i];
> +	}
> +
> +	if (format == PIPE_VIDEO_FORMAT_JPEG) {
> +		((uint8_t *)dec->bs_ptr)[0] = 0xff;	/* EOI */
> +		((uint8_t *)dec->bs_ptr)[1] = 0xd9;
> +		dec->bs_size += 2;
> +		dec->bs_ptr += 2;
> +	}
>   }
>   
>   /**
> @@ -126,7 +379,30 @@ static void radeon_jpeg_end_frame(struct pipe_video_codec *decoder,
>   			   struct pipe_video_buffer *target,
>   			   struct pipe_picture_desc *picture)
>   {
> -	/* TODO */
> +	struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> +	struct pb_buffer *dt;
> +	struct rvid_buffer *bs_buf;
> +
> +	assert(decoder);
> +
> +	if (!dec->bs_ptr)
> +		return;
> +
> +	bs_buf = &dec->bs_buffers[dec->cur_buffer];
> +
> +	memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
> +	dec->ws->buffer_unmap(bs_buf->res->buf);
> +
> +	dt = radeon_jpeg_get_decode_param(dec, target, picture);
> +
> +	send_cmd_bitstream(dec, bs_buf->res->buf,
> +		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
> +	send_cmd_target(dec, dt, 0,
> +		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
> +
> +	flush(dec, PIPE_FLUSH_ASYNC);
> +
> +	next_buffer(dec);
>   }
>   
>   /**