[10/18] radeon/vcn: add encode header implementations

Submitted by Zhang, Boyuan on Nov. 8, 2017, 6:08 p.m.

Details

Message ID 1510164503-13038-10-git-send-email-boyuan.zhang@amd.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Zhang, Boyuan Nov. 8, 2017, 6:08 p.m.
From: Boyuan Zhang <boyuan.zhang@amd.com>

Implement encoding of sps, pps, and silce headers using the newly added h.264
header coding descriptors functions based on h.264 specs.

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 ++++++++++++++++++++++++
 1 file changed, 234 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 5170c67..c6dc420 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -362,6 +362,233 @@  static void radeon_enc_quality_params(struct radeon_encoder *enc)
 	RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
+{
+	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
+	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+	radeon_enc_reset(enc);
+	radeon_enc_set_emulation_prevention(enc, false);
+	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+	radeon_enc_code_fixed_bits(enc, 0x67, 8);
+	radeon_enc_byte_align(enc);
+	radeon_enc_set_emulation_prevention(enc, true);
+	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
+	radeon_enc_code_fixed_bits(enc, 0x04, 8);
+	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);
+	radeon_enc_code_ue(enc, 0x0);
+
+	if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 ||
+			enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 ||
+			enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 ||
+			enc->enc_pic.spec_misc.profile_idc == 138) {
+		radeon_enc_code_ue(enc, 0x1);
+		radeon_enc_code_ue(enc, 0x0);
+		radeon_enc_code_ue(enc, 0x0);
+		radeon_enc_code_fixed_bits(enc, 0x0, 2);
+	}
+
+	radeon_enc_code_ue(enc, 1);
+	radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);
+
+	if (enc->enc_pic.pic_order_cnt_type == 0)
+		radeon_enc_code_ue(enc, 1);
+
+	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
+	radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);
+	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1));
+	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1));
+	bool progressive_only = true;
+	radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);
+
+	if (!progressive_only)
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+	if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||
+			(enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) {
+		radeon_enc_code_fixed_bits(enc, 0x1, 1);
+		radeon_enc_code_ue(enc, enc->enc_pic.crop_left);
+		radeon_enc_code_ue(enc, enc->enc_pic.crop_right);
+		radeon_enc_code_ue(enc, enc->enc_pic.crop_top);
+		radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);
+	} else
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, 16);
+	radeon_enc_code_ue(enc, 16);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_byte_align(enc);
+	radeon_enc_flush_headers(enc);
+	*size_in_bytes = (enc->bits_output + 7) / 8;
+	RADEON_ENC_END();
+}
+
+static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
+{
+	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);
+	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+	radeon_enc_reset(enc);
+	radeon_enc_set_emulation_prevention(enc, false);
+	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+	radeon_enc_code_fixed_bits(enc, 0x68, 8);
+	radeon_enc_byte_align(enc);
+	radeon_enc_set_emulation_prevention(enc, true);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 2);
+	radeon_enc_code_se(enc, 0x0);
+	radeon_enc_code_se(enc, 0x0);
+	radeon_enc_code_se(enc, 0x0);
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	radeon_enc_code_fixed_bits(enc, 0x1, 1);
+	radeon_enc_byte_align(enc);
+	radeon_enc_flush_headers(enc);
+	*size_in_bytes = (enc->bits_output + 7) / 8;
+	RADEON_ENC_END();
+}
+
+static void radeon_enc_slice_header(struct radeon_encoder *enc)
+{
+	uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
+	uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
+	unsigned int inst_index = 0;
+	unsigned int bit_index = 0;
+	unsigned int bits_copied = 0;
+	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SLICE_HEADER);
+	radeon_enc_reset(enc);
+	radeon_enc_set_emulation_prevention(enc, false);
+
+	if (enc->enc_pic.is_idr)
+		radeon_enc_code_fixed_bits(enc, 0x65, 8);
+	else if (enc->enc_pic.not_referenced)
+		radeon_enc_code_fixed_bits(enc, 0x01, 8);
+	else
+		radeon_enc_code_fixed_bits(enc, 0x41, 8);
+
+	radeon_enc_flush_headers(enc);
+	bit_index ++;
+	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
+	num_bits[inst_index] = enc->bits_output - bits_copied;
+	bits_copied = enc->bits_output;
+	inst_index++;
+
+	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB;
+	inst_index++;
+
+	switch(enc->enc_pic.picture_type) {
+		case PIPE_H264_ENC_PICTURE_TYPE_I:
+		case PIPE_H264_ENC_PICTURE_TYPE_IDR:
+			radeon_enc_code_fixed_bits(enc, 0x08, 7);
+			break;
+		case PIPE_H264_ENC_PICTURE_TYPE_P:
+		case PIPE_H264_ENC_PICTURE_TYPE_SKIP:
+			radeon_enc_code_fixed_bits(enc, 0x06, 5);
+			break;
+		case PIPE_H264_ENC_PICTURE_TYPE_B:
+			radeon_enc_code_fixed_bits(enc, 0x07, 5);
+			break;
+		default:
+			radeon_enc_code_fixed_bits(enc, 0x08, 7);
+	}
+
+	radeon_enc_code_ue(enc, 0x0);
+	radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5);
+
+	if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) {
+		radeon_enc_code_fixed_bits(enc, 0x1, 1);
+		radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1);
+	}
+
+	if (enc->enc_pic.is_idr)
+		radeon_enc_code_ue(enc, 0x0);
+
+	if (enc->enc_pic.pic_order_cnt_type == 0)
+		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);
+
+	if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) {
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+		if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) {
+			radeon_enc_code_fixed_bits(enc, 0x1, 1);
+			radeon_enc_code_ue(enc, 0x0);
+			radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1));
+			radeon_enc_code_ue(enc, 0x3);
+		} else
+			radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	}
+
+	if (enc->enc_pic.is_idr) {
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	} else
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+	if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable))
+		radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
+
+	radeon_enc_flush_headers(enc);
+	bit_index ++;
+	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
+	num_bits[inst_index] = enc->bits_output - bits_copied;
+	bits_copied = enc->bits_output;
+	inst_index++;
+
+	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA;
+	inst_index++;
+
+	radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0);
+
+	if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) {
+		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2);
+		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2);
+	}
+
+	radeon_enc_flush_headers(enc);
+	bit_index ++;
+	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
+	num_bits[inst_index] = enc->bits_output - bits_copied;
+	bits_copied = enc->bits_output;
+	inst_index++;
+
+	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
+
+	for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
+		RADEON_ENC_CS(0x00000000);
+
+	for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
+		RADEON_ENC_CS(instruction[j]);
+		RADEON_ENC_CS(num_bits[j]);
+	}
+
+	RADEON_ENC_END();
+}
+
 static void radeon_enc_ctx(struct radeon_encoder *enc)
 {
 	enc->enc_pic.ctx_buf.swizzle_mode = 0;
@@ -574,6 +801,13 @@  static void encode(struct radeon_encoder *enc)
 	radeon_enc_session_info(enc);
 	enc->total_task_size = 0;
 	radeon_enc_task_info(enc, enc->need_feedback);
+
+	if (enc->enc_pic.is_idr) {
+		radeon_enc_nalu_sps(enc);
+		radeon_enc_nalu_pps(enc);
+	}
+
+	radeon_enc_slice_header(enc);
 	radeon_enc_ctx(enc);
 	radeon_enc_bitstream(enc);
 	radeon_enc_feedback(enc);

Comments

On 08/11/17 18:08, boyuan.zhang@amd.com wrote:
> From: Boyuan Zhang <boyuan.zhang@amd.com>
> 
> Implement encoding of sps, pps, and silce headers using the newly added h.264
> header coding descriptors functions based on h.264 specs.
> 
> Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
> ---
>  src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 ++++++++++++++++++++++++
>  1 file changed, 234 insertions(+)

So, does this mean we could actually implement VAAPI encode properly with packed headers now rather than hard-coding all of this in the driver?

> 
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> index 5170c67..c6dc420 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>  	RADEON_ENC_END();
>  }
>  
> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
> +{
> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
> +	radeon_enc_reset(enc);
> +	radeon_enc_set_emulation_prevention(enc, false);
> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);
> +	radeon_enc_byte_align(enc);
> +	radeon_enc_set_emulation_prevention(enc, true);
> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);

Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".

> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);
> +	radeon_enc_code_ue(enc, 0x0);
> +
> +	if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 ||
> +			enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 ||
> +			enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 ||
> +			enc->enc_pic.spec_misc.profile_idc == 138) {
> +		radeon_enc_code_ue(enc, 0x1);
> +		radeon_enc_code_ue(enc, 0x0);
> +		radeon_enc_code_ue(enc, 0x0);
> +		radeon_enc_code_fixed_bits(enc, 0x0, 2);
> +	}
> +
> +	radeon_enc_code_ue(enc, 1);
> +	radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);
> +
> +	if (enc->enc_pic.pic_order_cnt_type == 0)
> +		radeon_enc_code_ue(enc, 1);

POC type can be 1 as well, which has more associated syntax.

> +
> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);
> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1));
> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1));
> +	bool progressive_only = true;
> +	radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);
> +
> +	if (!progressive_only)
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +
> +	if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||
> +			(enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) {
> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_left);
> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_right);
> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_top);
> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);
> +	} else
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

Including the tick rate would be nice even if none of the other metadata is available; I think you do know it here.

> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +	radeon_enc_code_ue(enc, 0x0);

Note that level limits imply the default value (2) of max_bytes_per_pic_denom.

> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_ue(enc, 16);
> +	radeon_enc_code_ue(enc, 16);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

Maybe separate the rbsp_trailing_bits to make it clearer?

> +	radeon_enc_byte_align(enc);
> +	radeon_enc_flush_headers(enc);
> +	*size_in_bytes = (enc->bits_output + 7) / 8;
> +	RADEON_ENC_END();
> +}
> +
> +static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
> +{
> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);
> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
> +	radeon_enc_reset(enc);
> +	radeon_enc_set_emulation_prevention(enc, false);
> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
> +	radeon_enc_code_fixed_bits(enc, 0x68, 8);
> +	radeon_enc_byte_align(enc);
> +	radeon_enc_set_emulation_prevention(enc, true);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 2);
> +	radeon_enc_code_se(enc, 0x0);
> +	radeon_enc_code_se(enc, 0x0);
> +	radeon_enc_code_se(enc, 0x0);
> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

No 8x8 transform?

> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +	radeon_enc_byte_align(enc);
> +	radeon_enc_flush_headers(enc);
> +	*size_in_bytes = (enc->bits_output + 7) / 8;
> +	RADEON_ENC_END();
> +}
> +
> +static void radeon_enc_slice_header(struct radeon_encoder *enc)
> +{
> +	uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
> +	uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
> +	unsigned int inst_index = 0;
> +	unsigned int bit_index = 0;
> +	unsigned int bits_copied = 0;
> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SLICE_HEADER);
> +	radeon_enc_reset(enc);
> +	radeon_enc_set_emulation_prevention(enc, false);
> +
> +	if (enc->enc_pic.is_idr)
> +		radeon_enc_code_fixed_bits(enc, 0x65, 8);
> +	else if (enc->enc_pic.not_referenced)
> +		radeon_enc_code_fixed_bits(enc, 0x01, 8);
> +	else
> +		radeon_enc_code_fixed_bits(enc, 0x41, 8);
> +
> +	radeon_enc_flush_headers(enc);
> +	bit_index ++;
> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
> +	num_bits[inst_index] = enc->bits_output - bits_copied;
> +	bits_copied = enc->bits_output;
> +	inst_index++;
> +
> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB;
> +	inst_index++;
Who fills first_mb_in_slice?  Is this dynamic because there might be multiple slices with boundaries unknown at this point?

> +
> +	switch(enc->enc_pic.picture_type) {
> +		case PIPE_H264_ENC_PICTURE_TYPE_I:
> +		case PIPE_H264_ENC_PICTURE_TYPE_IDR:
> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
> +			break;
> +		case PIPE_H264_ENC_PICTURE_TYPE_P:
> +		case PIPE_H264_ENC_PICTURE_TYPE_SKIP:
> +			radeon_enc_code_fixed_bits(enc, 0x06, 5);
> +			break;
> +		case PIPE_H264_ENC_PICTURE_TYPE_B:
> +			radeon_enc_code_fixed_bits(enc, 0x07, 5);
> +			break;
> +		default:
> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
> +	}

Can you explain what this is doing?  It's slice_type, which is normally unsigned exp-golomb, in some strange form?

> +
> +	radeon_enc_code_ue(enc, 0x0);
> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5);

If the user had log2_max_frame_num_minus4 == 0 then the truncation doesn't work.  Might be better to use the user value of log2_max_frame_num_minus4 in the SPS?

> +
> +	if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) {
> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1);
> +	}
> +
> +	if (enc->enc_pic.is_idr)
> +		radeon_enc_code_ue(enc, 0x0);

Please make idr_pic_id change between successive IDR pictures.  This is required if they are adjacent, and is generally a good idea even if they aren't.

(Or, better yet, use the value from the user.)

> +
> +	if (enc->enc_pic.pic_order_cnt_type == 0)
> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);

Same problem as frame_num with log2_max_pic_order_cnt_lsb_minus4.

> +

B-frames aren't actually supported yet, right?

(direct_spatial_mv_pred_flag is missing here.)

> +	if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) {
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +
> +		if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) {
> +			radeon_enc_code_fixed_bits(enc, 0x1, 1);
> +			radeon_enc_code_ue(enc, 0x0);
> +			radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1));

Does the rest of the code support using this to pick an earlier reference frame?

> +			radeon_enc_code_ue(enc, 0x3);
> +		} else
> +			radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	}
> +
> +	if (enc->enc_pic.is_idr) {
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +	} else
> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
> +
> +	if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable))
> +		radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
> +
> +	radeon_enc_flush_headers(enc);
> +	bit_index ++;
> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
> +	num_bits[inst_index] = enc->bits_output - bits_copied;
> +	bits_copied = enc->bits_output;
> +	inst_index++;
> +
> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA;
> +	inst_index++;

Who is going to decide the slice_qp_delta value to go here?

> +
> +	radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0);

Can also be 2.

> +
> +	if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) {
> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2);
> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2);
> +	}
> +
> +	radeon_enc_flush_headers(enc);
> +	bit_index ++;
> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
> +	num_bits[inst_index] = enc->bits_output - bits_copied;
> +	bits_copied = enc->bits_output;
> +	inst_index++;
> +
> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
> +
> +	for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
> +		RADEON_ENC_CS(0x00000000);
> +
> +	for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
> +		RADEON_ENC_CS(instruction[j]);
> +		RADEON_ENC_CS(num_bits[j]);
> +	}
> +
> +	RADEON_ENC_END();
> +}
> +
>  static void radeon_enc_ctx(struct radeon_encoder *enc)
>  {
>  	enc->enc_pic.ctx_buf.swizzle_mode = 0;
> @@ -574,6 +801,13 @@ static void encode(struct radeon_encoder *enc)
>  	radeon_enc_session_info(enc);
>  	enc->total_task_size = 0;
>  	radeon_enc_task_info(enc, enc->need_feedback);
> +
> +	if (enc->enc_pic.is_idr) {
> +		radeon_enc_nalu_sps(enc);
> +		radeon_enc_nalu_pps(enc);
> +	}
> +
> +	radeon_enc_slice_header(enc);
>  	radeon_enc_ctx(enc);
>  	radeon_enc_bitstream(enc);
>  	radeon_enc_feedback(enc);
> 

Thanks,

- Mark
Am 09.11.2017 um 17:57 schrieb Mark Thompson:
> On 08/11/17 18:08, boyuan.zhang@amd.com wrote:
>> From: Boyuan Zhang <boyuan.zhang@amd.com>
>>
>> Implement encoding of sps, pps, and silce headers using the newly added h.264
>> header coding descriptors functions based on h.264 specs.
>>
>> Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
>> ---
>>   src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 ++++++++++++++++++++++++
>>   1 file changed, 234 insertions(+)
> So, does this mean we could actually implement VAAPI encode properly with packed headers now rather than hard-coding all of this in the driver?

At least that's the intention here.

BTW: Boyuan we already have a bitstream write in picture_mpeg4.c in the 
VA state tracker.

Please unify all that stuff in a header in src/gallium/auxiliary/vl/.

Regards,
Christian.

>
>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> index 5170c67..c6dc420 100644
>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>   	RADEON_ENC_END();
>>   }
>>   
>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
>> +{
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_set_emulation_prevention(enc, true);
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);
> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>
> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +
>> +	if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 138) {
>> +		radeon_enc_code_ue(enc, 0x1);
>> +		radeon_enc_code_ue(enc, 0x0);
>> +		radeon_enc_code_ue(enc, 0x0);
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 2);
>> +	}
>> +
>> +	radeon_enc_code_ue(enc, 1);
>> +	radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);
>> +
>> +	if (enc->enc_pic.pic_order_cnt_type == 0)
>> +		radeon_enc_code_ue(enc, 1);
> POC type can be 1 as well, which has more associated syntax.
>
>> +
>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);
>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1));
>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1));
>> +	bool progressive_only = true;
>> +	radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);
>> +
>> +	if (!progressive_only)
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +	if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||
>> +			(enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) {
>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_left);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_right);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_top);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);
>> +	} else
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> Including the tick rate would be nice even if none of the other metadata is available; I think you do know it here.
>
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_ue(enc, 0x0);
> Note that level limits imply the default value (2) of max_bytes_per_pic_denom.
>
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 16);
>> +	radeon_enc_code_ue(enc, 16);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> Maybe separate the rbsp_trailing_bits to make it clearer?
>
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_flush_headers(enc);
>> +	*size_in_bytes = (enc->bits_output + 7) / 8;
>> +	RADEON_ENC_END();
>> +}
>> +
>> +static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
>> +{
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);
>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +	radeon_enc_code_fixed_bits(enc, 0x68, 8);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_set_emulation_prevention(enc, true);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 2);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> No 8x8 transform?
>
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_flush_headers(enc);
>> +	*size_in_bytes = (enc->bits_output + 7) / 8;
>> +	RADEON_ENC_END();
>> +}
>> +
>> +static void radeon_enc_slice_header(struct radeon_encoder *enc)
>> +{
>> +	uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
>> +	uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
>> +	unsigned int inst_index = 0;
>> +	unsigned int bit_index = 0;
>> +	unsigned int bits_copied = 0;
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SLICE_HEADER);
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +
>> +	if (enc->enc_pic.is_idr)
>> +		radeon_enc_code_fixed_bits(enc, 0x65, 8);
>> +	else if (enc->enc_pic.not_referenced)
>> +		radeon_enc_code_fixed_bits(enc, 0x01, 8);
>> +	else
>> +		radeon_enc_code_fixed_bits(enc, 0x41, 8);
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB;
>> +	inst_index++;
> Who fills first_mb_in_slice?  Is this dynamic because there might be multiple slices with boundaries unknown at this point?
>
>> +
>> +	switch(enc->enc_pic.picture_type) {
>> +		case PIPE_H264_ENC_PICTURE_TYPE_I:
>> +		case PIPE_H264_ENC_PICTURE_TYPE_IDR:
>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
>> +			break;
>> +		case PIPE_H264_ENC_PICTURE_TYPE_P:
>> +		case PIPE_H264_ENC_PICTURE_TYPE_SKIP:
>> +			radeon_enc_code_fixed_bits(enc, 0x06, 5);
>> +			break;
>> +		case PIPE_H264_ENC_PICTURE_TYPE_B:
>> +			radeon_enc_code_fixed_bits(enc, 0x07, 5);
>> +			break;
>> +		default:
>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
>> +	}
> Can you explain what this is doing?  It's slice_type, which is normally unsigned exp-golomb, in some strange form?
>
>> +
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5);
> If the user had log2_max_frame_num_minus4 == 0 then the truncation doesn't work.  Might be better to use the user value of log2_max_frame_num_minus4 in the SPS?
>
>> +
>> +	if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) {
>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1);
>> +	}
>> +
>> +	if (enc->enc_pic.is_idr)
>> +		radeon_enc_code_ue(enc, 0x0);
> Please make idr_pic_id change between successive IDR pictures.  This is required if they are adjacent, and is generally a good idea even if they aren't.
>
> (Or, better yet, use the value from the user.)
>
>> +
>> +	if (enc->enc_pic.pic_order_cnt_type == 0)
>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);
> Same problem as frame_num with log2_max_pic_order_cnt_lsb_minus4.
>
>> +
> B-frames aren't actually supported yet, right?
>
> (direct_spatial_mv_pred_flag is missing here.)
>
>> +	if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) {
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +		if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) {
>> +			radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +			radeon_enc_code_ue(enc, 0x0);
>> +			radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1));
> Does the rest of the code support using this to pick an earlier reference frame?
>
>> +			radeon_enc_code_ue(enc, 0x3);
>> +		} else
>> +			radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	}
>> +
>> +	if (enc->enc_pic.is_idr) {
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	} else
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable))
>> +		radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA;
>> +	inst_index++;
> Who is going to decide the slice_qp_delta value to go here?
>
>> +
>> +	radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0);
> Can also be 2.
>
>> +
>> +	if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) {
>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2);
>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2);
>> +	}
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
>> +
>> +	for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
>> +		RADEON_ENC_CS(0x00000000);
>> +
>> +	for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
>> +		RADEON_ENC_CS(instruction[j]);
>> +		RADEON_ENC_CS(num_bits[j]);
>> +	}
>> +
>> +	RADEON_ENC_END();
>> +}
>> +
>>   static void radeon_enc_ctx(struct radeon_encoder *enc)
>>   {
>>   	enc->enc_pic.ctx_buf.swizzle_mode = 0;
>> @@ -574,6 +801,13 @@ static void encode(struct radeon_encoder *enc)
>>   	radeon_enc_session_info(enc);
>>   	enc->total_task_size = 0;
>>   	radeon_enc_task_info(enc, enc->need_feedback);
>> +
>> +	if (enc->enc_pic.is_idr) {
>> +		radeon_enc_nalu_sps(enc);
>> +		radeon_enc_nalu_pps(enc);
>> +	}
>> +
>> +	radeon_enc_slice_header(enc);
>>   	radeon_enc_ctx(enc);
>>   	radeon_enc_bitstream(enc);
>>   	radeon_enc_feedback(enc);
>>
> Thanks,
>
> - Mark
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-----Original Message-----
From: mesa-dev [mailto:mesa-dev-bounces@lists.freedesktop.org] On Behalf Of Christian König

Sent: November-09-17 12:19 PM
To: Mark Thompson; mesa-dev@lists.freedesktop.org
Subject: Re: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations

Am 09.11.2017 um 17:57 schrieb Mark Thompson:
>> On 08/11/17 18:08, boyuan.zhang@amd.com wrote:

>>> From: Boyuan Zhang <boyuan.zhang@amd.com>

>>>

>>> Implement encoding of sps, pps, and silce headers using the newly 

>>> added h.264 header coding descriptors functions based on h.264 specs.

>>>

>>> Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>

>>> ---

>>>   src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 ++++++++++++++++++++++++

>>>   1 file changed, 234 insertions(+)

>> So, does this mean we could actually implement VAAPI encode properly with packed headers now rather than hard-coding all of this in the driver?


[Boyuan] Generally speaking, we are doing part of the slice header encoding work in driver side now. Previously for vce encode, all those works are done by firmware, and driver only provides required variables . Now for vcn encode, both driver and firmware do part of the work, and at the end firmware generate the entire slice header. For example, one of the questions you asked below that "who will set the value for slice_qp_delta?" It's actually that firmware will set the value, while driver side needs to instructs it. So in conclusion, we can't encode everything in driver side. All other questions/comments are answered below.

> At least that's the intention here.

> 

> BTW: Boyuan we already have a bitstream write in picture_mpeg4.c in the VA state tracker.

> 

> Please unify all that stuff in a header in src/gallium/auxiliary/vl/.

> 

> Regards,

> Christian.


[Boyuan] I see. I would like to create a separate patch and move those stuff after these bringup patches set, does it sound fine to you? 

>>

>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 

>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>> index 5170c67..c6dc420 100644

>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)

>>>   	RADEON_ENC_END();

>>>   }

>>>   

>>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {

>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);

>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>> +	radeon_enc_reset(enc);

>>> +	radeon_enc_set_emulation_prevention(enc, false);

>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);

>>> +	radeon_enc_byte_align(enc);

>>> +	radeon_enc_set_emulation_prevention(enc, true);

>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);

>>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);

> >Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

>>

> >Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".


[Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

>>

>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +

>>> +	if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 ||

>>> +			enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 ||

>>> +			enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 ||

>>> +			enc->enc_pic.spec_misc.profile_idc == 138) {

>>> +		radeon_enc_code_ue(enc, 0x1);

>>> +		radeon_enc_code_ue(enc, 0x0);

>>> +		radeon_enc_code_ue(enc, 0x0);

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 2);

>>> +	}

>>> +

>>> +	radeon_enc_code_ue(enc, 1);

>>> +	radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);

>>> +

>>> +	if (enc->enc_pic.pic_order_cnt_type == 0)

>>> +		radeon_enc_code_ue(enc, 1);

>> POC type can be 1 as well, which has more associated syntax.


[Boyuan] For now, we only support 0 and 2. And the implicit "else" is for poc_type==2 case.

>>

>>> +

>>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));

>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);

>>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1));

>>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1));

>>> +	bool progressive_only = true;

>>> +	radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);

>>> +

>>> +	if (!progressive_only)

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +

>>> +	if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||

>>> +			(enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) {

>>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_left);

>>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_right);

>>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_top);

>>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);

>>> +	} else

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>> Including the tick rate would be nice even if none of the other metadata is available; I think you do know it here.


[Boyuan] We haven't implement it for now. We will discuss about it. Thanks!

>>

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +	radeon_enc_code_ue(enc, 0x0);

>> Note that level limits imply the default value (2) of max_bytes_per_pic_denom.


[Boyuan] According to the spec “If max_bytes_per_pic_denom is equal to 0, no limits are indicated.”. We are trying not to set limit for now.

>>

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_ue(enc, 16);

>>> +	radeon_enc_code_ue(enc, 16);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>> Maybe separate the rbsp_trailing_bits to make it clearer?


[Boyuan] Totally agree. Fixed in the new patch.

>>

>>> +	radeon_enc_byte_align(enc);

>>> +	radeon_enc_flush_headers(enc);

>>> +	*size_in_bytes = (enc->bits_output + 7) / 8;

>>> +	RADEON_ENC_END();

>>> +}

>>> +

>>> +static void radeon_enc_nalu_pps(struct radeon_encoder *enc) {

>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);

>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>> +	radeon_enc_reset(enc);

>>> +	radeon_enc_set_emulation_prevention(enc, false);

>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>> +	radeon_enc_code_fixed_bits(enc, 0x68, 8);

>>> +	radeon_enc_byte_align(enc);

>>> +	radeon_enc_set_emulation_prevention(enc, true);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 2);

>>> +	radeon_enc_code_se(enc, 0x0);

>>> +	radeon_enc_code_se(enc, 0x0);

>>> +	radeon_enc_code_se(enc, 0x0);

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);

>> No 8x8 transform?


[Boyuan] No, our hardware doesn’t support 8x8 transform.

>>

>>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +	radeon_enc_byte_align(enc);

>>> +	radeon_enc_flush_headers(enc);

>>> +	*size_in_bytes = (enc->bits_output + 7) / 8;

>>> +	RADEON_ENC_END();

>>> +}

>>> +

>>> +static void radeon_enc_slice_header(struct radeon_encoder *enc) {

>>> +	uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};

>>> +	uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};

>>> +	unsigned int inst_index = 0;

>>> +	unsigned int bit_index = 0;

>>> +	unsigned int bits_copied = 0;

>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SLICE_HEADER);

>>> +	radeon_enc_reset(enc);

>>> +	radeon_enc_set_emulation_prevention(enc, false);

>>> +

>>> +	if (enc->enc_pic.is_idr)

>>> +		radeon_enc_code_fixed_bits(enc, 0x65, 8);

>>> +	else if (enc->enc_pic.not_referenced)

>>> +		radeon_enc_code_fixed_bits(enc, 0x01, 8);

>>> +	else

>>> +		radeon_enc_code_fixed_bits(enc, 0x41, 8);

>>> +

>>> +	radeon_enc_flush_headers(enc);

>>> +	bit_index ++;

>>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;

>>> +	num_bits[inst_index] = enc->bits_output - bits_copied;

>>> +	bits_copied = enc->bits_output;

>>> +	inst_index++;

>>> +

>>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB;

>>> +	inst_index++;

>> Who fills first_mb_in_slice?  Is this dynamic because there might be multiple slices with boundaries unknown at this point?


[Boyuan] Firmware does the job.

>>

>>> +

>>> +	switch(enc->enc_pic.picture_type) {

>>> +		case PIPE_H264_ENC_PICTURE_TYPE_I:

>>> +		case PIPE_H264_ENC_PICTURE_TYPE_IDR:

>>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);

>>> +			break;

>>> +		case PIPE_H264_ENC_PICTURE_TYPE_P:

>>> +		case PIPE_H264_ENC_PICTURE_TYPE_SKIP:

>>> +			radeon_enc_code_fixed_bits(enc, 0x06, 5);

>>> +			break;

>>> +		case PIPE_H264_ENC_PICTURE_TYPE_B:

>>> +			radeon_enc_code_fixed_bits(enc, 0x07, 5);

>>> +			break;

>>> +		default:

>>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);

>>> +	}

>> Can you explain what this is doing?  It's slice_type, which is normally unsigned exp-golomb, in some strange form?


[Boyuan] The code takes the “short-cut” by directly coding the pre-calculated ue results of the slice_type constant values.

>>

>>> +

>>> +	radeon_enc_code_ue(enc, 0x0);

>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5);

>> If the user had log2_max_frame_num_minus4 == 0 then the truncation doesn't work.  Might be better to use the user value of log2_max_frame_num_minus4 in the SPS?


[Boyuan] Right, we hardcoded log2_max_frame_num_minus4 = 1 for now in both sps and slice header, we plan to change it in the future.

>>

>>> +

>>> +	if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) {

>>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1);

>>> +	}

>>> +

>>> +	if (enc->enc_pic.is_idr)

>>> +		radeon_enc_code_ue(enc, 0x0);

>> Please make idr_pic_id change between successive IDR pictures.  This is required if they are adjacent, and is generally a good idea even if they aren't.


[Boyuan] Thanks for catching this! I fixed it in the new patch.

>>

>> (Or, better yet, use the value from the user.)

>>

>>> +

>>> +	if (enc->enc_pic.pic_order_cnt_type == 0)

>>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 

>>> +5);

>> Same problem as frame_num with log2_max_pic_order_cnt_lsb_minus4.


[Boyuan] Again, we hardcoded log2_max_pic_order_cnt_lsb_minus4 = 1 for now and plan to change it in the future.

>>

>>> +

>> B-frames aren't actually supported yet, right?


[Boyuan] No B-frame support yet.

>>

>> (direct_spatial_mv_pred_flag is missing here.)

>>

>>> +	if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) {

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +

>>> +		if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) {

>>> +			radeon_enc_code_fixed_bits(enc, 0x1, 1);

>>> +			radeon_enc_code_ue(enc, 0x0);

>>> +			radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - 

>>> +enc->enc_pic.ref_idx_l0 - 1));

>> Does the rest of the code support using this to pick an earlier reference frame?


[Boyuan] We plan to work on LTR later.

>>

>>> +			radeon_enc_code_ue(enc, 0x3);

>>> +		} else

>>> +			radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	}

>>> +

>>> +	if (enc->enc_pic.is_idr) {

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +	} else

>>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);

>>> +

>>> +	if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable))

>>> +		radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);

>>> +

>>> +	radeon_enc_flush_headers(enc);

>>> +	bit_index ++;

>>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;

>>> +	num_bits[inst_index] = enc->bits_output - bits_copied;

>>> +	bits_copied = enc->bits_output;

>>> +	inst_index++;

>>> +

>>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA;

>>> +	inst_index++;

>> Who is going to decide the slice_qp_delta value to go here?


[Boyuan] As explained above, Firmware will.

>>

>>> +

>>> +	radeon_enc_code_ue(enc, 

>>> +enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0);

>> Can also be 2.


[Boyuan] VCN support disabling deblocking filter across slice boundary. However, currently our driver doesn’t support it.

Thanks for all the comments, please review the modified the patch for the fixes mentioned above.

Thanks,
Boyuan

>>

>>> +

>>> +	if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) {

>>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2);

>>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2);

>>> +	}

>>> +

>>> +	radeon_enc_flush_headers(enc);

>>> +	bit_index ++;

>>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;

>>> +	num_bits[inst_index] = enc->bits_output - bits_copied;

>>> +	bits_copied = enc->bits_output;

>>> +	inst_index++;

>>> +

>>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;

>>> +

>>> +	for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)

>>> +		RADEON_ENC_CS(0x00000000);

>>> +

>>> +	for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {

>>> +		RADEON_ENC_CS(instruction[j]);

>>> +		RADEON_ENC_CS(num_bits[j]);

>>> +	}

>>> +

>>> +	RADEON_ENC_END();

>>> +}

>>> +

>>>   static void radeon_enc_ctx(struct radeon_encoder *enc)

>>>   {

>>>   	enc->enc_pic.ctx_buf.swizzle_mode = 0; @@ -574,6 +801,13 @@ static 

>>> void encode(struct radeon_encoder *enc)

>>>   	radeon_enc_session_info(enc);

>>>   	enc->total_task_size = 0;

>>>   	radeon_enc_task_info(enc, enc->need_feedback);

>>> +

>>> +	if (enc->enc_pic.is_idr) {

>>> +		radeon_enc_nalu_sps(enc);

>>> +		radeon_enc_nalu_pps(enc);

>>> +	}

>>> +

>>> +	radeon_enc_slice_header(enc);

>>>   	radeon_enc_ctx(enc);

>>>   	radeon_enc_bitstream(enc);

>>>   	radeon_enc_feedback(enc);

>>>

> Thanks,

>

> - Mark

> _______________________________________________

> mesa-dev mailing list

> mesa-dev@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Zhang, Boyuan wrote:

>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>> index 5170c67..c6dc420 100644
>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>    	RADEON_ENC_END();
>>>>    }
>>>>    
>>>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {
>>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>> +	radeon_enc_reset(enc);
>>>> +	radeon_enc_set_emulation_prevention(enc, false);
>>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>> +	radeon_enc_byte_align(enc);
>>>> +	radeon_enc_set_emulation_prevention(enc, true);
>>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>
>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
> 
> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

It's not really with cabac though. I know there was a patch to turn it 
off - but that would have been wasteful and make linux < windows.

Why not use 77 if cabac is on + not set constrained bits, windows seems 
to set main.

Currently with vce trying to set main manually from ffmeg/gst in order 
to get something "correct" still sets flags = something that's not seen 
as main (but works).
Zhang, Boyuan wrote:

>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> index 5170c67..c6dc420 100644

>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)

>>>>>    	RADEON_ENC_END();

>>>>>    }

>>>>>    

>>>>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {

>>>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);

>>>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>>>> +	radeon_enc_reset(enc);

>>>>> +	radeon_enc_set_emulation_prevention(enc, false);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);

>>>>> +	radeon_enc_byte_align(enc);

>>>>> +	radeon_enc_set_emulation_prevention(enc, true);

>>>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);

>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

>>>>

>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".

>> 

>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

>

>It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.

>

>Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.

>

>Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).


Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely now all main profile features, no b frame support. 
On linux side, we only support baseline profile for this vcn enc.
-----Original Message-----
From: Zhang, Boyuan 

Sent: November-13-17 11:41 AM
To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org
Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations

Zhang, Boyuan wrote:

>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> index 5170c67..c6dc420 100644

>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)

>>>>>    	RADEON_ENC_END();

>>>>>    }

>>>>>    

>>>>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {

>>>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);

>>>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>>>> +	radeon_enc_reset(enc);

>>>>> +	radeon_enc_set_emulation_prevention(enc, false);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);

>>>>> +	radeon_enc_byte_align(enc);

>>>>> +	radeon_enc_set_emulation_prevention(enc, true);

>>>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);

>>>>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);

>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

>>>>

>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".

>> 

>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

>

>It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.

>

>Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.

>

>Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).


Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support. 
On linux side, we only support baseline profile for this vcn enc.
(fixed a typo)
Zhang, Boyuan wrote:
> 
> 
> -----Original Message-----
> From: Zhang, Boyuan
> Sent: November-13-17 11:41 AM
> To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org
> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations
> 
> Zhang, Boyuan wrote:
> 
>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>> index 5170c67..c6dc420 100644
>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>>>     	RADEON_ENC_END();
>>>>>>     }
>>>>>>     
>>>>>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {
>>>>>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>>>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>>>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>>>> +	radeon_enc_reset(enc);
>>>>>> +	radeon_enc_set_emulation_prevention(enc, false);
>>>>>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>>>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>>>> +	radeon_enc_byte_align(enc);
>>>>>> +	radeon_enc_set_emulation_prevention(enc, true);
>>>>>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>>>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>>>
>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>>>
>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.
>>
>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.
>>
>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.
>>
>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).
> 
> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.
> On linux side, we only support baseline profile for this vcn enc.
> (fixed a typo)

Yea, it's tricky, FWIW windows does not use b-frames for the test I've 
done (re-live turned up high = 50mbit 60fps).
It is flagged as main and uses cabac.
I guess main is the "correct" way to describe constrained baseline + 
cabac even if there are no other main features like b-frames.
On 14/11/17 10:53, Andy Furniss wrote:
> Zhang, Boyuan wrote:
>>
>>
>> -----Original Message-----
>> From: Zhang, Boyuan
>> Sent: November-13-17 11:41 AM
>> To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org
>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations
>>
>> Zhang, Boyuan wrote:
>>
>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>> index 5170c67..c6dc420 100644
>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>>>>         RADEON_ENC_END();
>>>>>>>     }
>>>>>>>     +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {
>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>>>>> +    radeon_enc_reset(enc);
>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);
>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>>>>> +    radeon_enc_byte_align(enc);
>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);
>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>>>>
>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>>>>
>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.
>>>
>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.
>>>
>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.
>>>
>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).
>>
>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.
>> On linux side, we only support baseline profile for this vcn enc.
>> (fixed a typo)
> 
> Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).

I can get B-frames from AMF with a GCN 2 card on Windows.

> It is flagged as main and uses cabac.
> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.

Yes, it must be flagged as main in this case.

- Mark
On 14/11/17 10:53, Andy Furniss wrote:
>> Zhang, Boyuan wrote:

>>>

>>>

>>> -----Original Message-----

>>> From: Zhang, Boyuan

>>> Sent: November-13-17 11:41 AM

>>> To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org

>>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations

>>>

>>> Zhang, Boyuan wrote:

>>>

>>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>> index 5170c67..c6dc420 100644

>>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)

>>>>>>>>         RADEON_ENC_END();

>>>>>>>>     }

>>>>>>>>     +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {

>>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);

>>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>>>>>>> +    radeon_enc_reset(enc);

>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);

>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);

>>>>>>>> +    radeon_enc_byte_align(enc);

>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);

>>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);

>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);

>>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

>>>>>>>

>>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".

>>>>>

>>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

>>>>

>>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.

>>>>

>>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.

>>>>

>>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).

>>>

>>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.

>>> On linux side, we only support baseline profile for this vcn enc.

>>> (fixed a typo)

>> 

>>  Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).

>

> I can get B-frames from AMF with a GCN 2 card on Windows.


Yes, we used to support b-frame for GCN2 (e.g. like Bonaire, Kabini, etc...) cards. But for later Asics, we dropped the b-frame support. Including this raven vcn encode, b-frame is still not supported according to our firmware team.
- Boyuan

>

>> It is flagged as main and uses cabac.

>> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.


Yes, I agree, that's the correct way in that case. However, we can't advertise main profile since we don't have b-frame support. It would cause serious problem if player tries to use b-frame when seeing that main profile is supported. So as explained before, we only support constrained baseline profile for this raven vcn encode now. And as a result, cabac is forced to be off in this bring-up patch.
- Boyuan

>

> Yes, it must be flagged as main in this case.

>

> - Mark
Mark Thompson wrote:
> On 14/11/17 10:53, Andy Furniss wrote:
>> Zhang, Boyuan wrote:
>>>
>>>
>>> -----Original Message-----
>>> From: Zhang, Boyuan
>>> Sent: November-13-17 11:41 AM
>>> To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org
>>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations
>>>
>>> Zhang, Boyuan wrote:
>>>
>>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>> index 5170c67..c6dc420 100644
>>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>>>>>          RADEON_ENC_END();
>>>>>>>>      }
>>>>>>>>      +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {
>>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>>>>>> +    radeon_enc_reset(enc);
>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);
>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>>>>>> +    radeon_enc_byte_align(enc);
>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);
>>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>>>>>
>>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>>>>>
>>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.
>>>>
>>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.
>>>>
>>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.
>>>>
>>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).
>>>
>>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.
>>> On linux side, we only support baseline profile for this vcn enc.
>>> (fixed a typo)
>>
>> Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).
> 
> I can get B-frames from AMF with a GCN 2 card on Windows.

Oh good, there is hope for newer cards at least, IIRC in the ffmpeg AMF 
thread, linux support for the future was mentioned.

I was just looking at what relive game recording did, I never tried/know 
how to use AMF.

> 
>> It is flagged as main and uses cabac.
>> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.
> 
> Yes, it must be flagged as main in this case.

OK, thanks for the conformation.

FWIW I recorded xonotic big-key-bench demo at 60fps -qp 28, with and 
without cabac.
The cavlc version was 15% bigger than cabac, so it would be a shame to 
loose it.
Zhang, Boyuan wrote:
> 
> On 14/11/17 10:53, Andy Furniss wrote:
>>> Zhang, Boyuan wrote:
>>>>
>>>>
>>>> -----Original Message-----
>>>> From: Zhang, Boyuan
>>>> Sent: November-13-17 11:41 AM
>>>> To: Andy Furniss; Koenig, Christian; Mark Thompson; mesa-dev@lists.freedesktop.org
>>>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations
>>>>
>>>> Zhang, Boyuan wrote:
>>>>
>>>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>> index 5170c67..c6dc420 100644
>>>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>>>>>>          RADEON_ENC_END();
>>>>>>>>>      }
>>>>>>>>>      +static void radeon_enc_nalu_sps(struct radeon_encoder *enc) {
>>>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>>>>>>> +    radeon_enc_reset(enc);
>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);
>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>>>>>>> +    radeon_enc_byte_align(enc);
>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);
>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>>>>>>
>>>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>>>>>>
>>>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.
>>>>>
>>>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.
>>>>>
>>>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.
>>>>>
>>>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).
>>>>
>>>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.
>>>> On linux side, we only support baseline profile for this vcn enc.
>>>> (fixed a typo)
>>>
>>>   Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).
>>
>> I can get B-frames from AMF with a GCN 2 card on Windows.
> 
> Yes, we used to support b-frame for GCN2 (e.g. like Bonaire, Kabini, etc...) cards. But for later Asics, we dropped the b-frame support. Including this raven vcn encode, b-frame is still not supported according to our firmware team.
> - Boyuan
> 
>>
>>> It is flagged as main and uses cabac.
>>> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.
> 
> Yes, I agree, that's the correct way in that case. However, we can't advertise main profile since we don't have b-frame support. It would cause serious problem if player tries to use b-frame when seeing that main profile is supported.

I guess you mean encoder rather than player - I don't think a player 
should care if there are no b-frames in main.

FWIW vainfo does currently advertise main/high support, IIRC it was 
mentioned in the past but lost in a long thread with many other issues 
in it.

  So as explained before, we only support constrained baseline profile 
for this raven vcn encode now. And as a result, cabac is forced to be 
off in this bring-up patch.

Maybe advertise constrained baseline as encode + expose cabac switch 
that's in the vaapi spec and let the user/app decide to force that.
If cabac is on then flag the output as main.
Zhang, Boyuan wrote:
>> 

>> On 14/11/17 10:53, Andy Furniss wrote:

>>>> Zhang, Boyuan wrote:

>>>>>

>>>>>

>>>>> -----Original Message-----

>>>>> From: Zhang, Boyuan

>>>>> Sent: November-13-17 11:41 AM

>>>>> To: Andy Furniss; Koenig, Christian; Mark Thompson; 

>>>>> mesa-dev@lists.freedesktop.org

>>>>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header 

>>>>> implementations

>>>>>

>>>>> Zhang, Boyuan wrote:

>>>>>

>>>>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>>>> index 5170c67..c6dc420 100644

>>>>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

>>>>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)

>>>>>>>>>>          RADEON_ENC_END();

>>>>>>>>>>      }

>>>>>>>>>>      +static void radeon_enc_nalu_sps(struct radeon_encoder 

>>>>>>>>>> *enc) {

>>>>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);

>>>>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);

>>>>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];

>>>>>>>>>> +    radeon_enc_reset(enc);

>>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);

>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);

>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);

>>>>>>>>>> +    radeon_enc_byte_align(enc);

>>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);

>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);

>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);

>>>>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).

>>>>>>>>>

>>>>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".

>>>>>>>>

>>>>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.

>>>>>>

>>>>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.

>>>>>>

>>>>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.

>>>>>>

>>>>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).

>>>>>

>>>>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.

>>>>> On linux side, we only support baseline profile for this vcn enc.

>>>>> (fixed a typo)

>>>>

>>>>   Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).

>>>

>>> I can get B-frames from AMF with a GCN 2 card on Windows.

>> 

>> Yes, we used to support b-frame for GCN2 (e.g. like Bonaire, Kabini, etc...) cards. But for later Asics, we dropped the b-frame support. Including this raven vcn encode, b-frame is still not supported according to our firmware team.

>> - Boyuan

>> 

>>>

>>>> It is flagged as main and uses cabac.

>>>> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.

>> 

>> Yes, I agree, that's the correct way in that case. However, we can't advertise main profile since we don't have b-frame support. It would cause serious problem if player tries to use b-frame when seeing that main profile is supported.

>

> I guess you mean encoder rather than player - I don't think a player should care if there are no b-frames in main.

>

> FWIW vainfo does currently advertise main/high support, IIRC it was mentioned in the past but lost in a long thread with many other issues in it.


I remember there was a discussion before about this, I will double check it and will probably make a patch to disable them if not yet disabled.

>

>  So as explained before, we only support constrained baseline profile for this raven vcn encode now. And as a result, cabac is forced to be off in this bring-up patch.

>

> Maybe advertise constrained baseline as encode + expose cabac switch that's in the vaapi spec and let the user/app decide to force that.

If cabac is on then flag the output as main.

Thanks for this idea, I could raise this in our meeting to discuss the possibility of doing this. Currently, "advertising constrained baseline only"  + "enabling cabac" at the same time doesn't sound like a "correct"/"perfect" way to me, I need more discussions with my team about this. Therefore, I will push the current patches, e.g. keep cabac disabled for now, to bring up this vcn encode if you are OK with it.
- Boyuan
Zhang, Boyuan wrote:
> Zhang, Boyuan wrote:
>>>
>>> On 14/11/17 10:53, Andy Furniss wrote:
>>>>> Zhang, Boyuan wrote:
>>>>>>
>>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Zhang, Boyuan
>>>>>> Sent: November-13-17 11:41 AM
>>>>>> To: Andy Furniss; Koenig, Christian; Mark Thompson;
>>>>>> mesa-dev@lists.freedesktop.org
>>>>>> Subject: RE: [Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header
>>>>>> implementations
>>>>>>
>>>>>> Zhang, Boyuan wrote:
>>>>>>
>>>>>>>>>>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>>>> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>>>> index 5170c67..c6dc420 100644
>>>>>>>>>>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>>>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>>>>>>>>>>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>>>>>>>>>>           RADEON_ENC_END();
>>>>>>>>>>>       }
>>>>>>>>>>>       +static void radeon_enc_nalu_sps(struct radeon_encoder
>>>>>>>>>>> *enc) {
>>>>>>>>>>> +    RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>>>>>>>>>>> +    RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>>>>>>>>>>> +    uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>>>>>>>>>>> +    radeon_enc_reset(enc);
>>>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, false);
>>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x67, 8);
>>>>>>>>>>> +    radeon_enc_byte_align(enc);
>>>>>>>>>>> +    radeon_enc_set_emulation_prevention(enc, true);
>>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>>>>>>>>>>> +    radeon_enc_code_fixed_bits(enc, 0x04, 8);
>>>>>>>>>> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>>>>>>>>>>
>>>>>>>>>> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>>>>>>>>>
>>>>>>>> [Boyuan] Thanks for pointing out. I modified the value to be 0x44 in the new patch (set1=1, and set5=1) since we only support constrained baseline for now.
>>>>>>>
>>>>>>> It's not really with cabac though. I know there was a patch to turn it off - but that would have been wasteful and make linux < windows.
>>>>>>>
>>>>>>> Why not use 77 if cabac is on + not set constrained bits, windows seems to set main.
>>>>>>>
>>>>>>> Currently with vce trying to set main manually from ffmeg/gst in order to get something "correct" still sets flags = something that's not seen as main (but works).
>>>>>>
>>>>>> Yes, but still the problem is cabac is not allowed for baseline profile and we only support baseline for now. I'm not quite sure about windows test you mentioned, I'm just guessing that we might have some main profile features support in some closed test environment on windows side, but definitely not all main profile features, no b frame support.
>>>>>> On linux side, we only support baseline profile for this vcn enc.
>>>>>> (fixed a typo)
>>>>>
>>>>>    Yea, it's tricky, FWIW windows does not use b-frames for the test I've done (re-live turned up high = 50mbit 60fps).
>>>>
>>>> I can get B-frames from AMF with a GCN 2 card on Windows.
>>>
>>> Yes, we used to support b-frame for GCN2 (e.g. like Bonaire, Kabini, etc...) cards. But for later Asics, we dropped the b-frame support. Including this raven vcn encode, b-frame is still not supported according to our firmware team.
>>> - Boyuan
>>>
>>>>
>>>>> It is flagged as main and uses cabac.
>>>>> I guess main is the "correct" way to describe constrained baseline + cabac even if there are no other main features like b-frames.
>>>
>>> Yes, I agree, that's the correct way in that case. However, we can't advertise main profile since we don't have b-frame support. It would cause serious problem if player tries to use b-frame when seeing that main profile is supported.
>>
>> I guess you mean encoder rather than player - I don't think a player should care if there are no b-frames in main.
>>
>> FWIW vainfo does currently advertise main/high support, IIRC it was mentioned in the past but lost in a long thread with many other issues in it.
> 
> I remember there was a discussion before about this, I will double check it and will probably make a patch to disable them if not yet disabled.
> 
>>
>>   So as explained before, we only support constrained baseline profile for this raven vcn encode now. And as a result, cabac is forced to be off in this bring-up patch.
>>
>> Maybe advertise constrained baseline as encode + expose cabac switch that's in the vaapi spec and let the user/app decide to force that.
> If cabac is on then flag the output as main.
> 
> Thanks for this idea, I could raise this in our meeting to discuss the possibility of doing this. Currently, "advertising constrained baseline only"  + "enabling cabac" at the same time doesn't sound like a "correct"/"perfect" way to me, I need more discussions with my team about this. Therefore, I will push the current patches, e.g. keep cabac disabled for now, to bring up this vcn encode if you are OK with it.
> - Boyuan

Yea, fine, I am just discussing rather than trying to block - I don't 
even have a vcn card.