1/*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <string.h>
26#include <inttypes.h>
27#include <unistd.h>
28
29#include "CUnit/Basic.h"
30
31#include <unistd.h>
32#include "util_math.h"
33
34#include "amdgpu_test.h"
35#include "amdgpu_drm.h"
36#include "amdgpu_internal.h"
37#include "decode_messages.h"
38#include "frame.h"
39
40#define IB_SIZE		4096
41#define MAX_RESOURCES	16
42
43#define DECODE_CMD_MSG_BUFFER                              0x00000000
44#define DECODE_CMD_DPB_BUFFER                              0x00000001
45#define DECODE_CMD_DECODING_TARGET_BUFFER                  0x00000002
46#define DECODE_CMD_FEEDBACK_BUFFER                         0x00000003
47#define DECODE_CMD_PROB_TBL_BUFFER                         0x00000004
48#define DECODE_CMD_SESSION_CONTEXT_BUFFER                  0x00000005
49#define DECODE_CMD_BITSTREAM_BUFFER                        0x00000100
50#define DECODE_CMD_IT_SCALING_TABLE_BUFFER                 0x00000204
51#define DECODE_CMD_CONTEXT_BUFFER                          0x00000206
52
53#define DECODE_IB_PARAM_DECODE_BUFFER                      (0x00000001)
54
55#define DECODE_CMDBUF_FLAGS_MSG_BUFFER                     (0x00000001)
56#define DECODE_CMDBUF_FLAGS_DPB_BUFFER                     (0x00000002)
57#define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER               (0x00000004)
58#define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER         (0x00000008)
59#define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER                (0x00000010)
60#define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER              (0x00000200)
61#define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER                 (0x00000800)
62#define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER                (0x00001000)
63#define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER         (0x00100000)
64
65static bool vcn_dec_sw_ring = false;
66static bool vcn_unified_ring = false;
67
68#define H264_NAL_TYPE_NON_IDR_SLICE 1
69#define H264_NAL_TYPE_DP_A_SLICE 2
70#define H264_NAL_TYPE_DP_B_SLICE 3
71#define H264_NAL_TYPE_DP_C_SLICE 0x4
72#define H264_NAL_TYPE_IDR_SLICE 0x5
73#define H264_NAL_TYPE_SEI 0x6
74#define H264_NAL_TYPE_SEQ_PARAM 0x7
75#define H264_NAL_TYPE_PIC_PARAM 0x8
76#define H264_NAL_TYPE_ACCESS_UNIT 0x9
77#define H264_NAL_TYPE_END_OF_SEQ 0xa
78#define H264_NAL_TYPE_END_OF_STREAM 0xb
79#define H264_NAL_TYPE_FILLER_DATA 0xc
80#define H264_NAL_TYPE_SEQ_EXTENSION 0xd
81
82#define H264_START_CODE 0x000001
83
84struct amdgpu_vcn_bo {
85	amdgpu_bo_handle handle;
86	amdgpu_va_handle va_handle;
87	uint64_t addr;
88	uint64_t size;
89	uint8_t *ptr;
90};
91
92typedef struct rvcn_decode_buffer_s {
93	unsigned int valid_buf_flag;
94	unsigned int msg_buffer_address_hi;
95	unsigned int msg_buffer_address_lo;
96	unsigned int dpb_buffer_address_hi;
97	unsigned int dpb_buffer_address_lo;
98	unsigned int target_buffer_address_hi;
99	unsigned int target_buffer_address_lo;
100	unsigned int session_contex_buffer_address_hi;
101	unsigned int session_contex_buffer_address_lo;
102	unsigned int bitstream_buffer_address_hi;
103	unsigned int bitstream_buffer_address_lo;
104	unsigned int context_buffer_address_hi;
105	unsigned int context_buffer_address_lo;
106	unsigned int feedback_buffer_address_hi;
107	unsigned int feedback_buffer_address_lo;
108	unsigned int luma_hist_buffer_address_hi;
109	unsigned int luma_hist_buffer_address_lo;
110	unsigned int prob_tbl_buffer_address_hi;
111	unsigned int prob_tbl_buffer_address_lo;
112	unsigned int sclr_coeff_buffer_address_hi;
113	unsigned int sclr_coeff_buffer_address_lo;
114	unsigned int it_sclr_table_buffer_address_hi;
115	unsigned int it_sclr_table_buffer_address_lo;
116	unsigned int sclr_target_buffer_address_hi;
117	unsigned int sclr_target_buffer_address_lo;
118	unsigned int cenc_size_info_buffer_address_hi;
119	unsigned int cenc_size_info_buffer_address_lo;
120	unsigned int mpeg2_pic_param_buffer_address_hi;
121	unsigned int mpeg2_pic_param_buffer_address_lo;
122	unsigned int mpeg2_mb_control_buffer_address_hi;
123	unsigned int mpeg2_mb_control_buffer_address_lo;
124	unsigned int mpeg2_idct_coeff_buffer_address_hi;
125	unsigned int mpeg2_idct_coeff_buffer_address_lo;
126} rvcn_decode_buffer_t;
127
128typedef struct rvcn_decode_ib_package_s {
129	unsigned int package_size;
130	unsigned int package_type;
131} rvcn_decode_ib_package_t;
132
133
134struct amdgpu_vcn_reg {
135	uint32_t data0;
136	uint32_t data1;
137	uint32_t cmd;
138	uint32_t nop;
139	uint32_t cntl;
140};
141
142typedef struct BufferInfo_t {
143	uint32_t numOfBitsInBuffer;
144	const uint8_t *decBuffer;
145	uint8_t decData;
146	uint32_t decBufferSize;
147	const uint8_t *end;
148} bufferInfo;
149
150typedef struct h264_decode_t {
151	uint8_t profile;
152	uint8_t level_idc;
153	uint8_t nal_ref_idc;
154	uint8_t nal_unit_type;
155	uint32_t pic_width, pic_height;
156	uint32_t slice_type;
157} h264_decode;
158
159static amdgpu_device_handle device_handle;
160static uint32_t major_version;
161static uint32_t minor_version;
162static uint32_t family_id;
163static uint32_t chip_rev;
164static uint32_t chip_id;
165static uint32_t asic_id;
166static uint32_t chip_rev;
167static struct amdgpu_vcn_bo enc_buf;
168static struct amdgpu_vcn_bo cpb_buf;
169static uint32_t enc_task_id;
170
171static amdgpu_context_handle context_handle;
172static amdgpu_bo_handle ib_handle;
173static amdgpu_va_handle ib_va_handle;
174static uint64_t ib_mc_address;
175static uint32_t *ib_cpu;
176static uint32_t *ib_checksum;
177static uint32_t *ib_size_in_dw;
178
179static rvcn_decode_buffer_t *decode_buffer;
180struct amdgpu_vcn_bo session_ctx_buf;
181
182static amdgpu_bo_handle resources[MAX_RESOURCES];
183static unsigned num_resources;
184
185static uint8_t vcn_reg_index;
186static struct amdgpu_vcn_reg reg[] = {
187	{0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6},
188	{0x504, 0x505, 0x503, 0x53f, 0x506},
189	{0x10, 0x11, 0xf, 0x29, 0x26d},
190};
191
192uint32_t gWidth, gHeight, gSliceType;
193static uint32_t vcn_ip_version_major;
194static uint32_t vcn_ip_version_minor;
195static void amdgpu_cs_vcn_dec_create(void);
196static void amdgpu_cs_vcn_dec_decode(void);
197static void amdgpu_cs_vcn_dec_destroy(void);
198
199static void amdgpu_cs_vcn_enc_create(void);
200static void amdgpu_cs_vcn_enc_encode(void);
201static void amdgpu_cs_vcn_enc_destroy(void);
202
203static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc);
204static void amdgpu_cs_sq_ib_tail(uint32_t *end);
205static void h264_check_0s (bufferInfo * bufInfo, int count);
206static int32_t h264_se (bufferInfo * bufInfo);
207static inline uint32_t bs_read_u1(bufferInfo *bufinfo);
208static inline int bs_eof(bufferInfo *bufinfo);
209static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n);
210static inline uint32_t bs_read_ue(bufferInfo* bufinfo);
211static uint32_t remove_03 (uint8_t *bptr, uint32_t len);
212static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo);
213static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo);
214static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo);
215static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo);
216static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen);
217static int verify_checksum(uint8_t *buffer, uint32_t buffer_size);
218
219CU_TestInfo vcn_tests[] = {
220
221	{ "VCN DEC create",  amdgpu_cs_vcn_dec_create },
222	{ "VCN DEC decode",  amdgpu_cs_vcn_dec_decode },
223	{ "VCN DEC destroy",  amdgpu_cs_vcn_dec_destroy },
224
225	{ "VCN ENC create",  amdgpu_cs_vcn_enc_create },
226	{ "VCN ENC encode",  amdgpu_cs_vcn_enc_encode },
227	{ "VCN ENC destroy",  amdgpu_cs_vcn_enc_destroy },
228	CU_TEST_INFO_NULL,
229};
230
231CU_BOOL suite_vcn_tests_enable(void)
232{
233	struct drm_amdgpu_info_hw_ip info;
234	bool enc_ring, dec_ring;
235	int r;
236
237	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
238				   &minor_version, &device_handle))
239		return CU_FALSE;
240
241	family_id = device_handle->info.family_id;
242	asic_id = device_handle->info.asic_id;
243	chip_rev = device_handle->info.chip_rev;
244	chip_id = device_handle->info.chip_external_rev;
245
246	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info);
247	if (!r) {
248		vcn_ip_version_major = info.hw_ip_version_major;
249		vcn_ip_version_minor = info.hw_ip_version_minor;
250		enc_ring = !!info.available_rings;
251		/* in vcn 4.0 it re-uses encoding queue as unified queue */
252		if (vcn_ip_version_major >= 4) {
253			vcn_unified_ring = true;
254			vcn_dec_sw_ring = true;
255			dec_ring = enc_ring;
256		} else {
257			r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
258			dec_ring = !!info.available_rings;
259		}
260	}
261
262	if (amdgpu_device_deinitialize(device_handle))
263		return CU_FALSE;
264
265	if (r) {
266		printf("\n\nASIC query hw info failed\n");
267		return CU_FALSE;
268	}
269
270	if (!(dec_ring || enc_ring) ||
271	    (family_id < AMDGPU_FAMILY_RV &&
272	     (family_id == AMDGPU_FAMILY_AI &&
273	      (chip_id - chip_rev) < 0x32))) {  /* Arcturus */
274		printf("\n\nThe ASIC NOT support VCN, suite disabled\n");
275		return CU_FALSE;
276	}
277
278	if (!dec_ring) {
279		amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE);
280		amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE);
281		amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE);
282	}
283
284	if (family_id == AMDGPU_FAMILY_AI || !enc_ring) {
285		amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE);
286		amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE);
287		amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE);
288	}
289
290	if (vcn_ip_version_major == 1)
291		vcn_reg_index = 0;
292	else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0)
293		vcn_reg_index = 1;
294	else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) ||
295				vcn_ip_version_major == 3)
296		vcn_reg_index = 2;
297
298	return CU_TRUE;
299}
300
301int suite_vcn_tests_init(void)
302{
303	int r;
304
305	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
306				     &minor_version, &device_handle);
307	if (r)
308		return CUE_SINIT_FAILED;
309
310	family_id = device_handle->info.family_id;
311
312	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
313	if (r)
314		return CUE_SINIT_FAILED;
315
316	r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
317				    AMDGPU_GEM_DOMAIN_GTT, 0,
318				    &ib_handle, (void**)&ib_cpu,
319				    &ib_mc_address, &ib_va_handle);
320	if (r)
321		return CUE_SINIT_FAILED;
322
323	return CUE_SUCCESS;
324}
325
326int suite_vcn_tests_clean(void)
327{
328	int r;
329
330	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
331			     ib_mc_address, IB_SIZE);
332	if (r)
333		return CUE_SCLEAN_FAILED;
334
335	r = amdgpu_cs_ctx_free(context_handle);
336	if (r)
337		return CUE_SCLEAN_FAILED;
338
339	r = amdgpu_device_deinitialize(device_handle);
340	if (r)
341		return CUE_SCLEAN_FAILED;
342
343	return CUE_SUCCESS;
344}
345
346static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc)
347{
348	/* signature */
349	*(base + (*offset)++) = 0x00000010;
350	*(base + (*offset)++) = 0x30000002;
351	ib_checksum = base + (*offset)++;
352	ib_size_in_dw = base + (*offset)++;
353
354	/* engine info */
355	*(base + (*offset)++) = 0x00000010;
356	*(base + (*offset)++) = 0x30000001;
357	*(base + (*offset)++) = enc ? 2 : 3;
358	*(base + (*offset)++) = 0x00000000;
359}
360
361static void amdgpu_cs_sq_ib_tail(uint32_t *end)
362{
363	uint32_t size_in_dw;
364	uint32_t checksum = 0;
365
366	/* if the pointers are invalid, no need to process */
367	if (ib_checksum == NULL || ib_size_in_dw == NULL)
368		return;
369
370	size_in_dw = end - ib_size_in_dw - 1;
371	*ib_size_in_dw = size_in_dw;
372	*(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
373
374	for (int i = 0; i < size_in_dw; i++)
375		checksum += *(ib_checksum + 2 + i);
376
377	*ib_checksum = checksum;
378
379	ib_checksum = NULL;
380	ib_size_in_dw = NULL;
381}
382
383static int submit(unsigned ndw, unsigned ip)
384{
385	struct amdgpu_cs_request ibs_request = {0};
386	struct amdgpu_cs_ib_info ib_info = {0};
387	struct amdgpu_cs_fence fence_status = {0};
388	uint32_t expired;
389	int r;
390
391	ib_info.ib_mc_address = ib_mc_address;
392	ib_info.size = ndw;
393
394	ibs_request.ip_type = ip;
395
396	r = amdgpu_bo_list_create(device_handle, num_resources, resources,
397				  NULL, &ibs_request.resources);
398	if (r)
399		return r;
400
401	ibs_request.number_of_ibs = 1;
402	ibs_request.ibs = &ib_info;
403	ibs_request.fence_info.handle = NULL;
404
405	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
406	if (r)
407		return r;
408
409	r = amdgpu_bo_list_destroy(ibs_request.resources);
410	if (r)
411		return r;
412
413	fence_status.context = context_handle;
414	fence_status.ip_type = ip;
415	fence_status.fence = ibs_request.seq_no;
416
417	r = amdgpu_cs_query_fence_status(&fence_status,
418					 AMDGPU_TIMEOUT_INFINITE,
419					 0, &expired);
420	if (r)
421		return r;
422
423	return 0;
424}
425
426static void alloc_resource(struct amdgpu_vcn_bo *vcn_bo,
427			unsigned size, unsigned domain)
428{
429	struct amdgpu_bo_alloc_request req = {0};
430	amdgpu_bo_handle buf_handle;
431	amdgpu_va_handle va_handle;
432	uint64_t va = 0;
433	int r;
434
435	req.alloc_size = ALIGN(size, 4096);
436	req.preferred_heap = domain;
437	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
438	CU_ASSERT_EQUAL(r, 0);
439	r = amdgpu_va_range_alloc(device_handle,
440				  amdgpu_gpu_va_range_general,
441				  req.alloc_size, 1, 0, &va,
442				  &va_handle, 0);
443	CU_ASSERT_EQUAL(r, 0);
444	r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
445			    AMDGPU_VA_OP_MAP);
446	CU_ASSERT_EQUAL(r, 0);
447	vcn_bo->addr = va;
448	vcn_bo->handle = buf_handle;
449	vcn_bo->size = req.alloc_size;
450	vcn_bo->va_handle = va_handle;
451	r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr);
452	CU_ASSERT_EQUAL(r, 0);
453	memset(vcn_bo->ptr, 0, size);
454	r = amdgpu_bo_cpu_unmap(vcn_bo->handle);
455	CU_ASSERT_EQUAL(r, 0);
456}
457
458static void free_resource(struct amdgpu_vcn_bo *vcn_bo)
459{
460	int r;
461
462	r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size,
463			    vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
464	CU_ASSERT_EQUAL(r, 0);
465
466	r = amdgpu_va_range_free(vcn_bo->va_handle);
467	CU_ASSERT_EQUAL(r, 0);
468
469	r = amdgpu_bo_free(vcn_bo->handle);
470	CU_ASSERT_EQUAL(r, 0);
471	memset(vcn_bo, 0, sizeof(*vcn_bo));
472}
473
474static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
475{
476	if (vcn_dec_sw_ring == false) {
477		ib_cpu[(*idx)++] = reg[vcn_reg_index].data0;
478		ib_cpu[(*idx)++] = addr;
479		ib_cpu[(*idx)++] = reg[vcn_reg_index].data1;
480		ib_cpu[(*idx)++] = addr >> 32;
481		ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd;
482		ib_cpu[(*idx)++] = cmd << 1;
483		return;
484	}
485
486	/* Support decode software ring message */
487	if (!(*idx)) {
488		rvcn_decode_ib_package_t *ib_header;
489
490		if (vcn_unified_ring)
491			amdgpu_cs_sq_head(ib_cpu, idx, false);
492
493		ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx];
494		ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
495			sizeof(struct rvcn_decode_ib_package_s);
496
497		(*idx)++;
498		ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER);
499		(*idx)++;
500
501		decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]);
502		*idx += sizeof(struct rvcn_decode_buffer_s) / 4;
503		memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
504	}
505
506	switch(cmd) {
507		case DECODE_CMD_MSG_BUFFER:
508			decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER;
509			decode_buffer->msg_buffer_address_hi = (addr >> 32);
510			decode_buffer->msg_buffer_address_lo = (addr);
511		break;
512		case DECODE_CMD_DPB_BUFFER:
513			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER);
514			decode_buffer->dpb_buffer_address_hi = (addr >> 32);
515			decode_buffer->dpb_buffer_address_lo = (addr);
516		break;
517		case DECODE_CMD_DECODING_TARGET_BUFFER:
518			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
519			decode_buffer->target_buffer_address_hi = (addr >> 32);
520			decode_buffer->target_buffer_address_lo = (addr);
521		break;
522		case DECODE_CMD_FEEDBACK_BUFFER:
523			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
524			decode_buffer->feedback_buffer_address_hi = (addr >> 32);
525			decode_buffer->feedback_buffer_address_lo = (addr);
526		break;
527		case DECODE_CMD_PROB_TBL_BUFFER:
528			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
529			decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
530			decode_buffer->prob_tbl_buffer_address_lo = (addr);
531		break;
532		case DECODE_CMD_SESSION_CONTEXT_BUFFER:
533			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
534			decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
535			decode_buffer->session_contex_buffer_address_lo = (addr);
536		break;
537		case DECODE_CMD_BITSTREAM_BUFFER:
538			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
539			decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
540			decode_buffer->bitstream_buffer_address_lo = (addr);
541		break;
542		case DECODE_CMD_IT_SCALING_TABLE_BUFFER:
543			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
544			decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
545			decode_buffer->it_sclr_table_buffer_address_lo = (addr);
546		break;
547		case DECODE_CMD_CONTEXT_BUFFER:
548			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
549			decode_buffer->context_buffer_address_hi = (addr >> 32);
550			decode_buffer->context_buffer_address_lo = (addr);
551		break;
552		default:
553			printf("Not Support!\n");
554	}
555}
556
557static void amdgpu_cs_vcn_dec_create(void)
558{
559	struct amdgpu_vcn_bo msg_buf;
560	unsigned ip;
561	int len, r;
562
563	num_resources  = 0;
564	alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
565	alloc_resource(&session_ctx_buf, 32 * 4096, AMDGPU_GEM_DOMAIN_VRAM);
566	resources[num_resources++] = msg_buf.handle;
567	resources[num_resources++] = session_ctx_buf.handle;
568	resources[num_resources++] = ib_handle;
569
570	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
571	CU_ASSERT_EQUAL(r, 0);
572
573	memset(msg_buf.ptr, 0, 4096);
574	memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg));
575
576	len = 0;
577
578	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
579	if (vcn_dec_sw_ring == true) {
580		vcn_dec_cmd(msg_buf.addr, 0, &len);
581	} else {
582		ib_cpu[len++] = reg[vcn_reg_index].data0;
583		ib_cpu[len++] = msg_buf.addr;
584		ib_cpu[len++] = reg[vcn_reg_index].data1;
585		ib_cpu[len++] = msg_buf.addr >> 32;
586		ib_cpu[len++] = reg[vcn_reg_index].cmd;
587		ib_cpu[len++] = 0;
588		for (; len % 16; ) {
589			ib_cpu[len++] = reg[vcn_reg_index].nop;
590			ib_cpu[len++] = 0;
591		}
592	}
593
594	if (vcn_unified_ring) {
595		amdgpu_cs_sq_ib_tail(ib_cpu + len);
596		ip = AMDGPU_HW_IP_VCN_ENC;
597	} else
598		ip = AMDGPU_HW_IP_VCN_DEC;
599
600	r = submit(len, ip);
601
602	CU_ASSERT_EQUAL(r, 0);
603
604	free_resource(&msg_buf);
605}
606
607static void amdgpu_cs_vcn_dec_decode(void)
608{
609	const unsigned dpb_size = 15923584, dt_size = 737280;
610	uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum;
611	struct amdgpu_vcn_bo dec_buf;
612	int size, len, i, r;
613	unsigned ip;
614	uint8_t *dec;
615
616	size = 4*1024; /* msg */
617	size += 4*1024; /* fb */
618	size += 4096; /*it_scaling_table*/
619	size += ALIGN(sizeof(uvd_bitstream), 4*1024);
620	size += ALIGN(dpb_size, 4*1024);
621	size += ALIGN(dt_size, 4*1024);
622
623	num_resources = 0;
624	alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT);
625	resources[num_resources++] = dec_buf.handle;
626	resources[num_resources++] = ib_handle;
627
628	r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr);
629	dec = dec_buf.ptr;
630
631	CU_ASSERT_EQUAL(r, 0);
632	memset(dec_buf.ptr, 0, size);
633	memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg));
634	memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg),
635			avc_decode_msg, sizeof(avc_decode_msg));
636
637	dec += 4*1024;
638	memcpy(dec, feedback_msg, sizeof(feedback_msg));
639	dec += 4*1024;
640	memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table));
641
642	dec += 4*1024;
643	memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream));
644
645	dec += ALIGN(sizeof(uvd_bitstream), 4*1024);
646
647	dec += ALIGN(dpb_size, 4*1024);
648
649	msg_addr = dec_buf.addr;
650	fb_addr = msg_addr + 4*1024;
651	it_addr = fb_addr + 4*1024;
652	bs_addr = it_addr + 4*1024;
653	dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
654	ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
655	dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
656
657	len = 0;
658	vcn_dec_cmd(session_ctx_buf.addr, 0x5, &len);
659	vcn_dec_cmd(msg_addr, 0x0, &len);
660	vcn_dec_cmd(dpb_addr, 0x1, &len);
661	vcn_dec_cmd(dt_addr, 0x2, &len);
662	vcn_dec_cmd(fb_addr, 0x3, &len);
663	vcn_dec_cmd(bs_addr, 0x100, &len);
664	vcn_dec_cmd(it_addr, 0x204, &len);
665	vcn_dec_cmd(ctx_addr, 0x206, &len);
666
667	if (vcn_dec_sw_ring == false) {
668		ib_cpu[len++] = reg[vcn_reg_index].cntl;
669		ib_cpu[len++] = 0x1;
670		for (; len % 16; ) {
671			ib_cpu[len++] = reg[vcn_reg_index].nop;
672			ib_cpu[len++] = 0;
673		}
674	}
675
676	if (vcn_unified_ring) {
677		amdgpu_cs_sq_ib_tail(ib_cpu + len);
678		ip = AMDGPU_HW_IP_VCN_ENC;
679	} else
680		ip = AMDGPU_HW_IP_VCN_DEC;
681
682	r = submit(len, ip);
683	CU_ASSERT_EQUAL(r, 0);
684
685	for (i = 0, sum = 0; i < dt_size; ++i)
686		sum += dec[i];
687
688	CU_ASSERT_EQUAL(sum, SUM_DECODE);
689
690	free_resource(&dec_buf);
691}
692
693static void amdgpu_cs_vcn_dec_destroy(void)
694{
695	struct amdgpu_vcn_bo msg_buf;
696	unsigned ip;
697	int len, r;
698
699	num_resources = 0;
700	alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT);
701	resources[num_resources++] = msg_buf.handle;
702	resources[num_resources++] = ib_handle;
703
704	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
705	CU_ASSERT_EQUAL(r, 0);
706
707	memset(msg_buf.ptr, 0, 1024);
708	memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg));
709
710	len = 0;
711	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
712	if (vcn_dec_sw_ring == true) {
713		vcn_dec_cmd(msg_buf.addr, 0, &len);
714	} else {
715		ib_cpu[len++] = reg[vcn_reg_index].data0;
716		ib_cpu[len++] = msg_buf.addr;
717		ib_cpu[len++] = reg[vcn_reg_index].data1;
718		ib_cpu[len++] = msg_buf.addr >> 32;
719		ib_cpu[len++] = reg[vcn_reg_index].cmd;
720		ib_cpu[len++] = 0;
721		for (; len % 16; ) {
722			ib_cpu[len++] = reg[vcn_reg_index].nop;
723			ib_cpu[len++] = 0;
724		}
725	}
726
727	if (vcn_unified_ring) {
728		amdgpu_cs_sq_ib_tail(ib_cpu + len);
729		ip = AMDGPU_HW_IP_VCN_ENC;
730	} else
731		ip = AMDGPU_HW_IP_VCN_DEC;
732
733	r = submit(len, ip);
734	CU_ASSERT_EQUAL(r, 0);
735
736	free_resource(&msg_buf);
737	free_resource(&session_ctx_buf);
738}
739
740static void amdgpu_cs_vcn_enc_create(void)
741{
742	int len, r;
743	uint32_t *p_task_size = NULL;
744	uint32_t task_offset = 0, st_offset;
745	uint32_t *st_size = NULL;
746	unsigned width = 160, height = 128, buf_size;
747	uint32_t fw_maj = 1, fw_min = 9;
748
749	if (vcn_ip_version_major == 2) {
750		fw_maj = 1;
751		fw_min = 1;
752	} else if (vcn_ip_version_major == 3) {
753		fw_maj = 1;
754		fw_min = 0;
755	}
756
757	gWidth = width;
758	gHeight = height;
759	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
760	enc_task_id = 1;
761
762	num_resources = 0;
763	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT);
764	alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT);
765	resources[num_resources++] = enc_buf.handle;
766	resources[num_resources++] = cpb_buf.handle;
767	resources[num_resources++] = ib_handle;
768
769	r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr);
770	memset(enc_buf.ptr, 0, 128 * 1024);
771	r = amdgpu_bo_cpu_unmap(enc_buf.handle);
772
773	r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr);
774	memset(enc_buf.ptr, 0, buf_size * 2);
775	r = amdgpu_bo_cpu_unmap(cpb_buf.handle);
776
777	len = 0;
778
779	if (vcn_unified_ring)
780		amdgpu_cs_sq_head(ib_cpu, &len, true);
781
782	/* session info */
783	st_offset = len;
784	st_size = &ib_cpu[len++];	/* size */
785	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
786	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
787	ib_cpu[len++] = enc_buf.addr >> 32;
788	ib_cpu[len++] = enc_buf.addr;
789	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
790	*st_size = (len - st_offset) * 4;
791
792	/* task info */
793	task_offset = len;
794	st_offset = len;
795	st_size = &ib_cpu[len++];	/* size */
796	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
797	p_task_size = &ib_cpu[len++];
798	ib_cpu[len++] = enc_task_id++;	/* task_id */
799	ib_cpu[len++] = 0;	/* feedback */
800	*st_size = (len - st_offset) * 4;
801
802	/* op init */
803	st_offset = len;
804	st_size = &ib_cpu[len++];	/* size */
805	ib_cpu[len++] = 0x01000001;	/* RENCODE_IB_OP_INITIALIZE */
806	*st_size = (len - st_offset) * 4;
807
808	/* session_init */
809	st_offset = len;
810	st_size = &ib_cpu[len++];	/* size */
811	ib_cpu[len++] = 0x00000003;	/* RENCODE_IB_PARAM_SESSION_INIT */
812	ib_cpu[len++] = 1;	/* RENCODE_ENCODE_STANDARD_H264 */
813	ib_cpu[len++] = width;
814	ib_cpu[len++] = height;
815	ib_cpu[len++] = 0;
816	ib_cpu[len++] = 0;
817	ib_cpu[len++] = 0;	/* pre encode mode */
818	ib_cpu[len++] = 0;	/* chroma enabled : false */
819	ib_cpu[len++] = 0;
820	ib_cpu[len++] = 0;
821	*st_size = (len - st_offset) * 4;
822
823	/* slice control */
824	st_offset = len;
825	st_size = &ib_cpu[len++];	/* size */
826	ib_cpu[len++] = 0x00200001;	/* RENCODE_H264_IB_PARAM_SLICE_CONTROL */
827	ib_cpu[len++] = 0;	/* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */
828	ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16;
829	*st_size = (len - st_offset) * 4;
830
831	/* enc spec misc */
832	st_offset = len;
833	st_size = &ib_cpu[len++];	/* size */
834	ib_cpu[len++] = 0x00200002;	/* RENCODE_H264_IB_PARAM_SPEC_MISC */
835	ib_cpu[len++] = 0;	/* constrained intra pred flag */
836	ib_cpu[len++] = 0;	/* cabac enable */
837	ib_cpu[len++] = 0;	/* cabac init idc */
838	ib_cpu[len++] = 1;	/* half pel enabled */
839	ib_cpu[len++] = 1;	/* quarter pel enabled */
840	ib_cpu[len++] = 100;	/* BASELINE profile */
841	ib_cpu[len++] = 11;	/* level */
842	if (vcn_ip_version_major >= 3) {
843		ib_cpu[len++] = 0;	/* b_picture_enabled */
844		ib_cpu[len++] = 0;	/* weighted_bipred_idc */
845	}
846	*st_size = (len - st_offset) * 4;
847
848	/* deblocking filter */
849	st_offset = len;
850	st_size = &ib_cpu[len++];	/* size */
851	ib_cpu[len++] = 0x00200004;	/* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */
852	ib_cpu[len++] = 0;	/* disable deblocking filter idc */
853	ib_cpu[len++] = 0;	/* alpha c0 offset */
854	ib_cpu[len++] = 0;	/* tc offset */
855	ib_cpu[len++] = 0;	/* cb offset */
856	ib_cpu[len++] = 0;	/* cr offset */
857	*st_size = (len - st_offset) * 4;
858
859	/* layer control */
860	st_offset = len;
861	st_size = &ib_cpu[len++];	/* size */
862	ib_cpu[len++] = 0x00000004;	/* RENCODE_IB_PARAM_LAYER_CONTROL */
863	ib_cpu[len++] = 1;	/* max temporal layer */
864	ib_cpu[len++] = 1;	/* no of temporal layer */
865	*st_size = (len - st_offset) * 4;
866
867	/* rc_session init */
868	st_offset = len;
869	st_size = &ib_cpu[len++];	/* size */
870	ib_cpu[len++] = 0x00000006;	/* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */
871	ib_cpu[len++] = 0;	/* rate control */
872	ib_cpu[len++] = 48;	/* vbv buffer level */
873	*st_size = (len - st_offset) * 4;
874
875	/* quality params */
876	st_offset = len;
877	st_size = &ib_cpu[len++];	/* size */
878	ib_cpu[len++] = 0x00000009;	/* RENCODE_IB_PARAM_QUALITY_PARAMS */
879	ib_cpu[len++] = 0;	/* vbaq mode */
880	ib_cpu[len++] = 0;	/* scene change sensitivity */
881	ib_cpu[len++] = 0;	/* scene change min idr interval */
882	ib_cpu[len++] = 0;
883	if (vcn_ip_version_major >= 3)
884		ib_cpu[len++] = 0;
885	*st_size = (len - st_offset) * 4;
886
887	/* layer select */
888	st_offset = len;
889	st_size = &ib_cpu[len++];	/* size */
890	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
891	ib_cpu[len++] = 0;	/* temporal layer */
892	*st_size = (len - st_offset) * 4;
893
894	/* rc layer init */
895	st_offset = len;
896	st_size = &ib_cpu[len++];	/* size */
897	ib_cpu[len++] = 0x00000007;	/* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */
898	ib_cpu[len++] = 0;
899	ib_cpu[len++] = 0;
900	ib_cpu[len++] = 25;
901	ib_cpu[len++] = 1;
902	ib_cpu[len++] = 0x01312d00;
903	ib_cpu[len++] = 0;
904	ib_cpu[len++] = 0;
905	ib_cpu[len++] = 0;
906	*st_size = (len - st_offset) * 4;
907
908	/* layer select */
909	st_offset = len;
910	st_size = &ib_cpu[len++];	/* size */
911	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
912	ib_cpu[len++] = 0;	/* temporal layer */
913	*st_size = (len - st_offset) * 4;
914
915	/* rc per pic */
916	st_offset = len;
917	st_size = &ib_cpu[len++];	/* size */
918	ib_cpu[len++] = 0x00000008;	/* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */
919	ib_cpu[len++] = 20;
920	ib_cpu[len++] = 0;
921	ib_cpu[len++] = 51;
922	ib_cpu[len++] = 0;
923	ib_cpu[len++] = 1;
924	ib_cpu[len++] = 0;
925	ib_cpu[len++] = 1;
926	ib_cpu[len++] = 0;
927	*st_size = (len - st_offset) * 4;
928
929	/* op init rc */
930	st_offset = len;
931	st_size = &ib_cpu[len++];	/* size */
932	ib_cpu[len++] = 0x01000004;	/* RENCODE_IB_OP_INIT_RC */
933	*st_size = (len - st_offset) * 4;
934
935	/* op init rc vbv */
936	st_offset = len;
937	st_size = &ib_cpu[len++];	/* size */
938	ib_cpu[len++] = 0x01000005;	/* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */
939	*st_size = (len - st_offset) * 4;
940
941	*p_task_size = (len - task_offset) * 4;
942
943	if (vcn_unified_ring)
944		amdgpu_cs_sq_ib_tail(ib_cpu + len);
945
946	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
947	CU_ASSERT_EQUAL(r, 0);
948}
949
950static int32_t h264_se (bufferInfo * bufInfo)
951{
952	uint32_t ret;
953
954	ret = bs_read_ue (bufInfo);
955	if ((ret & 0x1) == 0) {
956		ret >>= 1;
957		int32_t temp = 0 - ret;
958		return temp;
959	}
960
961	return (ret + 1) >> 1;
962}
963
964static void h264_check_0s (bufferInfo * bufInfo, int count)
965{
966	uint32_t val;
967
968	val = bs_read_u (bufInfo, count);
969	if (val != 0) {
970		printf ("field error - %d bits should be 0 is %x\n", count, val);
971	}
972}
973
974static inline int bs_eof(bufferInfo * bufinfo)
975{
976	if (bufinfo->decBuffer >= bufinfo->end)
977		return 1;
978	else
979		return 0;
980}
981
982static inline uint32_t bs_read_u1(bufferInfo *bufinfo)
983{
984	uint32_t r = 0;
985	uint32_t temp = 0;
986
987	bufinfo->numOfBitsInBuffer--;
988	if (! bs_eof(bufinfo)) {
989		temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer);
990		r = temp & 0x01;
991	}
992
993	if (bufinfo->numOfBitsInBuffer == 0) {
994		bufinfo->decBuffer++;
995		bufinfo->decData = *bufinfo->decBuffer;
996		bufinfo->numOfBitsInBuffer = 8;
997	}
998
999	return r;
1000}
1001
1002static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n)
1003{
1004	uint32_t r = 0;
1005	int i;
1006
1007	for (i = 0; i < n; i++) {
1008		r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) );
1009	}
1010
1011	return r;
1012}
1013
1014static inline uint32_t bs_read_ue(bufferInfo* bufinfo)
1015{
1016	int32_t r = 0;
1017	int i = 0;
1018
1019	while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) {
1020		i++;
1021	}
1022	r = bs_read_u(bufinfo, i);
1023	r += (1 << i) - 1;
1024	return r;
1025}
1026
1027static uint32_t remove_03 (uint8_t * bptr, uint32_t len)
1028{
1029	uint32_t nal_len = 0;
1030	while (nal_len + 2 < len) {
1031		if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) {
1032			bptr += 2;
1033			nal_len += 2;
1034			len--;
1035			memmove (bptr, bptr + 1, len - nal_len);
1036		} else {
1037			bptr++;
1038			nal_len++;
1039		}
1040	}
1041	return len;
1042}
1043
1044static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo)
1045{
1046	uint32_t lastScale = 8, nextScale = 8;
1047	uint32_t jx;
1048	int deltaScale;
1049
1050	for (jx = 0; jx < sizeOfScalingList; jx++) {
1051		if (nextScale != 0) {
1052			deltaScale = h264_se (bufInfo);
1053			nextScale = (lastScale + deltaScale + 256) % 256;
1054		}
1055		if (nextScale == 0) {
1056			lastScale = lastScale;
1057		} else {
1058			lastScale = nextScale;
1059		}
1060	}
1061}
1062
1063static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo)
1064{
1065	uint32_t temp;
1066
1067	dec->profile = bs_read_u (bufInfo, 8);
1068	bs_read_u (bufInfo, 1);		/* constaint_set0_flag */
1069	bs_read_u (bufInfo, 1);		/* constaint_set1_flag */
1070	bs_read_u (bufInfo, 1);		/* constaint_set2_flag */
1071	bs_read_u (bufInfo, 1);		/* constaint_set3_flag */
1072	bs_read_u (bufInfo, 1);		/* constaint_set4_flag */
1073	bs_read_u (bufInfo, 1);		/* constaint_set5_flag */
1074
1075
1076	h264_check_0s (bufInfo, 2);
1077	dec->level_idc = bs_read_u (bufInfo, 8);
1078	bs_read_ue (bufInfo);	/* SPS id*/
1079
1080	if (dec->profile == 100 || dec->profile == 110 ||
1081		dec->profile == 122 || dec->profile == 144) {
1082		uint32_t chroma_format_idc = bs_read_ue (bufInfo);
1083		if (chroma_format_idc == 3) {
1084			bs_read_u (bufInfo, 1);	/* residual_colour_transform_flag */
1085		}
1086		bs_read_ue (bufInfo);	/* bit_depth_luma_minus8 */
1087		bs_read_ue (bufInfo);	/* bit_depth_chroma_minus8 */
1088		bs_read_u (bufInfo, 1);	/* qpprime_y_zero_transform_bypass_flag */
1089		uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1);
1090
1091		if (seq_scaling_matrix_present_flag) {
1092			for (uint32_t ix = 0; ix < 8; ix++) {
1093				temp = bs_read_u (bufInfo, 1);
1094				if (temp) {
1095					scaling_list (ix, ix < 6 ? 16 : 64, bufInfo);
1096				}
1097			}
1098		}
1099	}
1100
1101	bs_read_ue (bufInfo);	/* log2_max_frame_num_minus4 */
1102	uint32_t pic_order_cnt_type = bs_read_ue (bufInfo);
1103
1104	if (pic_order_cnt_type == 0) {
1105		bs_read_ue (bufInfo);	/* log2_max_pic_order_cnt_lsb_minus4 */
1106	} else if (pic_order_cnt_type == 1) {
1107		bs_read_u (bufInfo, 1);	/* delta_pic_order_always_zero_flag */
1108		h264_se (bufInfo);	/* offset_for_non_ref_pic */
1109		h264_se (bufInfo);	/* offset_for_top_to_bottom_field */
1110		temp = bs_read_ue (bufInfo);
1111		for (uint32_t ix = 0; ix < temp; ix++) {
1112			 h264_se (bufInfo);	/* offset_for_ref_frame[index] */
1113		}
1114	}
1115	bs_read_ue (bufInfo);	/* num_ref_frames */
1116	bs_read_u (bufInfo, 1);	/* gaps_in_frame_num_flag */
1117	uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1;
1118
1119	dec->pic_width = PicWidthInMbs * 16;
1120	uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1;
1121
1122	dec->pic_height = PicHeightInMapUnits * 16;
1123	uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1);
1124	if (!frame_mbs_only_flag) {
1125		bs_read_u (bufInfo, 1);	/* mb_adaptive_frame_field_flag */
1126	}
1127	bs_read_u (bufInfo, 1);	/* direct_8x8_inference_flag */
1128	temp = bs_read_u (bufInfo, 1);
1129	if (temp) {
1130		bs_read_ue (bufInfo);	/* frame_crop_left_offset */
1131		bs_read_ue (bufInfo);	/* frame_crop_right_offset */
1132		bs_read_ue (bufInfo);	/* frame_crop_top_offset */
1133		bs_read_ue (bufInfo);	/* frame_crop_bottom_offset */
1134	}
1135	temp = bs_read_u (bufInfo, 1);	/* VUI Parameters  */
1136}
1137
1138static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo)
1139{
1140	uint32_t temp;
1141
1142	bs_read_ue (bufInfo);	/* first_mb_in_slice */
1143	temp = bs_read_ue (bufInfo);
1144	dec->slice_type = ((temp > 5) ? (temp - 5) : temp);
1145}
1146
1147static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo)
1148{
1149	uint8_t type = 0;
1150
1151	h264_check_0s (bufInfo, 1);
1152	dec->nal_ref_idc = bs_read_u (bufInfo, 2);
1153	dec->nal_unit_type = type = bs_read_u (bufInfo, 5);
1154	switch (type)
1155	{
1156	case H264_NAL_TYPE_NON_IDR_SLICE:
1157	case H264_NAL_TYPE_IDR_SLICE:
1158		h264_slice_header (dec, bufInfo);
1159		break;
1160	case H264_NAL_TYPE_SEQ_PARAM:
1161		h264_parse_sequence_parameter_set (dec, bufInfo);
1162		break;
1163	case H264_NAL_TYPE_PIC_PARAM:
1164	case H264_NAL_TYPE_SEI:
1165	case H264_NAL_TYPE_ACCESS_UNIT:
1166	case H264_NAL_TYPE_SEQ_EXTENSION:
1167		/* NOP */
1168		break;
1169	default:
1170		printf ("Nal type unknown %d \n ", type);
1171		break;
1172	}
1173	return type;
1174}
1175
1176static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen)
1177{
1178	uint32_t val;
1179	uint32_t offset, startBytes;
1180
1181	offset = startBytes = 0;
1182	if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) {
1183		pBuf += 4;
1184		offset = 4;
1185		startBytes = 1;
1186	} else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) {
1187		pBuf += 3;
1188		offset = 3;
1189		startBytes = 1;
1190	}
1191	val = 0xffffffff;
1192	while (offset < bufLen - 3) {
1193		val <<= 8;
1194		val |= *pBuf++;
1195		offset++;
1196		if (val == H264_START_CODE)
1197			return offset - 4;
1198
1199		if ((val & 0x00ffffff) == H264_START_CODE)
1200			return offset - 3;
1201	}
1202	if (bufLen - offset <= 3 && startBytes == 0) {
1203		startBytes = 0;
1204		return 0;
1205	}
1206
1207	return offset;
1208}
1209
1210static int verify_checksum(uint8_t *buffer, uint32_t buffer_size)
1211{
1212	uint32_t buffer_pos = 0;
1213	int done = 0;
1214	h264_decode dec;
1215
1216	memset(&dec, 0, sizeof(h264_decode));
1217	do {
1218		uint32_t ret;
1219
1220		ret = h264_find_next_start_code (buffer + buffer_pos,
1221				 buffer_size - buffer_pos);
1222		if (ret == 0) {
1223			done = 1;
1224			if (buffer_pos == 0) {
1225				fprintf (stderr,
1226				 "couldn't find start code in buffer from 0\n");
1227			}
1228		} else {
1229		/* have a complete NAL from buffer_pos to end */
1230			if (ret > 3) {
1231				uint32_t nal_len;
1232				bufferInfo bufinfo;
1233
1234				nal_len = remove_03 (buffer + buffer_pos, ret);
1235				bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4);
1236				bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8;
1237				bufinfo.end = buffer + buffer_pos + nal_len;
1238				bufinfo.numOfBitsInBuffer = 8;
1239				bufinfo.decData = *bufinfo.decBuffer;
1240				h264_parse_nal (&dec, &bufinfo);
1241			}
1242			buffer_pos += ret;	/*  buffer_pos points to next code */
1243		}
1244	} while (done == 0);
1245
1246	if ((dec.pic_width == gWidth) &&
1247		(dec.pic_height == gHeight) &&
1248		(dec.slice_type == gSliceType))
1249	    return 0;
1250	else
1251		return -1;
1252}
1253
1254static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type)
1255{
1256	uint32_t *fb_ptr;
1257	uint8_t *bs_ptr;
1258	uint32_t size;
1259	int r;
1260/* 	uint64_t s[3] = {0, 1121279001727, 1059312481445}; */
1261
1262	r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr);
1263	CU_ASSERT_EQUAL(r, 0);
1264	fb_ptr = (uint32_t*)fb_buf.ptr;
1265	size = fb_ptr[6];
1266	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1267	CU_ASSERT_EQUAL(r, 0);
1268	r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr);
1269	CU_ASSERT_EQUAL(r, 0);
1270
1271	bs_ptr = (uint8_t*)bs_buf.ptr;
1272	r = verify_checksum(bs_ptr, size);
1273	CU_ASSERT_EQUAL(r, 0);
1274	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1275
1276	CU_ASSERT_EQUAL(r, 0);
1277}
1278
1279static void amdgpu_cs_vcn_ib_zero_count(int *len, int num)
1280{
1281	for (int i = 0; i < num; i++)
1282		ib_cpu[(*len)++] = 0;
1283}
1284
1285static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
1286{
1287	struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf;
1288	int len, r;
1289	unsigned width = 160, height = 128, buf_size;
1290	uint32_t *p_task_size = NULL;
1291	uint32_t task_offset = 0, st_offset;
1292	uint32_t *st_size = NULL;
1293	uint32_t fw_maj = 1, fw_min = 9;
1294
1295	if (vcn_ip_version_major == 2) {
1296		fw_maj = 1;
1297		fw_min = 1;
1298	} else if (vcn_ip_version_major == 3) {
1299		fw_maj = 1;
1300		fw_min = 0;
1301	}
1302	gSliceType = frame_type;
1303	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
1304
1305	num_resources = 0;
1306	alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1307	alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1308	alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT);
1309	resources[num_resources++] = enc_buf.handle;
1310	resources[num_resources++] = cpb_buf.handle;
1311	resources[num_resources++] = bs_buf.handle;
1312	resources[num_resources++] = fb_buf.handle;
1313	resources[num_resources++] = input_buf.handle;
1314	resources[num_resources++] = ib_handle;
1315
1316
1317	r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr);
1318	memset(bs_buf.ptr, 0, 4096);
1319	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1320
1321	r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr);
1322	memset(fb_buf.ptr, 0, 4096);
1323	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1324
1325	r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr);
1326	CU_ASSERT_EQUAL(r, 0);
1327
1328	for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++)
1329		memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width);
1330
1331	r = amdgpu_bo_cpu_unmap(input_buf.handle);
1332	CU_ASSERT_EQUAL(r, 0);
1333
1334	len = 0;
1335
1336	if (vcn_unified_ring)
1337		amdgpu_cs_sq_head(ib_cpu, &len, true);
1338
1339	/* session info */
1340	st_offset = len;
1341	st_size = &ib_cpu[len++];	/* size */
1342	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
1343	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1344	ib_cpu[len++] = enc_buf.addr >> 32;
1345	ib_cpu[len++] = enc_buf.addr;
1346	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE */;
1347	*st_size = (len - st_offset) * 4;
1348
1349	/* task info */
1350	task_offset = len;
1351	st_offset = len;
1352	st_size = &ib_cpu[len++];	/* size */
1353	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
1354	p_task_size = &ib_cpu[len++];
1355	ib_cpu[len++] = enc_task_id++;	/* task_id */
1356	ib_cpu[len++] = 1;	/* feedback */
1357	*st_size = (len - st_offset) * 4;
1358
1359	if (frame_type == 2) {
1360		/* sps */
1361		st_offset = len;
1362		st_size = &ib_cpu[len++];	/* size */
1363		if(vcn_ip_version_major == 1)
1364			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */
1365		else
1366			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */
1367		ib_cpu[len++] = 0x00000002;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */
1368		ib_cpu[len++] = 0x00000011;	/* sps len */
1369		ib_cpu[len++] = 0x00000001;	/* start code */
1370		ib_cpu[len++] = 0x6764440b;
1371		ib_cpu[len++] = 0xac54c284;
1372		ib_cpu[len++] = 0x68078442;
1373		ib_cpu[len++] = 0x37000000;
1374		*st_size = (len - st_offset) * 4;
1375
1376		/* pps */
1377		st_offset = len;
1378		st_size = &ib_cpu[len++];	/* size */
1379		if(vcn_ip_version_major == 1)
1380			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/
1381		else
1382			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/
1383		ib_cpu[len++] = 0x00000003;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */
1384		ib_cpu[len++] = 0x00000008;	/* pps len */
1385		ib_cpu[len++] = 0x00000001;	/* start code */
1386		ib_cpu[len++] = 0x68ce3c80;
1387		*st_size = (len - st_offset) * 4;
1388	}
1389
1390	/* slice header */
1391	st_offset = len;
1392	st_size = &ib_cpu[len++];	/* size */
1393	if(vcn_ip_version_major == 1)
1394		ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */
1395	else
1396		ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */
1397	if (frame_type == 2) {
1398		ib_cpu[len++] = 0x65000000;
1399		ib_cpu[len++] = 0x11040000;
1400	} else {
1401		ib_cpu[len++] = 0x41000000;
1402		ib_cpu[len++] = 0x34210000;
1403	}
1404	ib_cpu[len++] = 0xe0000000;
1405	amdgpu_cs_vcn_ib_zero_count(&len, 13);
1406
1407	ib_cpu[len++] = 0x00000001;
1408	ib_cpu[len++] = 0x00000008;
1409	ib_cpu[len++] = 0x00020000;
1410	ib_cpu[len++] = 0x00000000;
1411	ib_cpu[len++] = 0x00000001;
1412	ib_cpu[len++] = 0x00000015;
1413	ib_cpu[len++] = 0x00020001;
1414	ib_cpu[len++] = 0x00000000;
1415	ib_cpu[len++] = 0x00000001;
1416	ib_cpu[len++] = 0x00000003;
1417	amdgpu_cs_vcn_ib_zero_count(&len, 22);
1418	*st_size = (len - st_offset) * 4;
1419
1420	/* encode params */
1421	st_offset = len;
1422	st_size = &ib_cpu[len++];	/* size */
1423	if(vcn_ip_version_major == 1)
1424		ib_cpu[len++] = 0x0000000b;	/* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */
1425	else
1426		ib_cpu[len++] = 0x0000000f;	/* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */
1427	ib_cpu[len++] = frame_type;
1428	ib_cpu[len++] = 0x0001f000;
1429	ib_cpu[len++] = input_buf.addr >> 32;
1430	ib_cpu[len++] = input_buf.addr;
1431	ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32;
1432	ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32);
1433	ib_cpu[len++] = 0x00000100;
1434	ib_cpu[len++] = 0x00000080;
1435	ib_cpu[len++] = 0x00000000;
1436	ib_cpu[len++] = 0xffffffff;
1437	ib_cpu[len++] = 0x00000000;
1438	*st_size = (len - st_offset) * 4;
1439
1440	/* encode params h264 */
1441	st_offset = len;
1442	st_size = &ib_cpu[len++];	/* size */
1443	ib_cpu[len++] = 0x00200003;	/* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */
1444	if (vcn_ip_version_major <= 2) {
1445		ib_cpu[len++] = 0x00000000;
1446		ib_cpu[len++] = 0x00000000;
1447		ib_cpu[len++] = 0x00000000;
1448		ib_cpu[len++] = 0xffffffff;
1449	} else {
1450		ib_cpu[len++] = 0x00000000;
1451		ib_cpu[len++] = 0x00000000;
1452		ib_cpu[len++] = 0x00000000;
1453		ib_cpu[len++] = 0x00000000;
1454		ib_cpu[len++] = 0x00000000;
1455		ib_cpu[len++] = 0x00000000;
1456		ib_cpu[len++] = 0x00000000;
1457		ib_cpu[len++] = 0xffffffff;
1458		ib_cpu[len++] = 0x00000000;
1459		ib_cpu[len++] = 0x00000000;
1460		ib_cpu[len++] = 0x00000000;
1461		ib_cpu[len++] = 0x00000000;
1462		ib_cpu[len++] = 0xffffffff;
1463		ib_cpu[len++] = 0x00000000;
1464		ib_cpu[len++] = 0x00000000;
1465		ib_cpu[len++] = 0x00000000;
1466		ib_cpu[len++] = 0x00000000;
1467		ib_cpu[len++] = 0x00000001;
1468	}
1469	*st_size = (len - st_offset) * 4;
1470
1471	/* encode context */
1472	st_offset = len;
1473	st_size = &ib_cpu[len++];	/* size */
1474	if(vcn_ip_version_major == 1)
1475		ib_cpu[len++] = 0x0000000d;	/* ENCODE_CONTEXT_BUFFER  vcn 1 */
1476	else
1477		ib_cpu[len++] = 0x00000011;	/* ENCODE_CONTEXT_BUFFER  other vcn */
1478	ib_cpu[len++] = cpb_buf.addr >> 32;
1479	ib_cpu[len++] = cpb_buf.addr;
1480	ib_cpu[len++] = 0x00000000;	/* swizzle mode */
1481	ib_cpu[len++] = 0x00000100;	/* luma pitch */
1482	ib_cpu[len++] = 0x00000100;	/* chroma pitch */
1483	ib_cpu[len++] = 0x00000002; /* no reconstructed picture */
1484	ib_cpu[len++] = 0x00000000;	/* reconstructed pic 1 luma offset */
1485	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32);	/* pic1 chroma offset */
1486	if(vcn_ip_version_major == 4)
1487		amdgpu_cs_vcn_ib_zero_count(&len, 2);
1488	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;	/* pic2 luma offset */
1489	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2;	/* pic2 chroma offset */
1490
1491	amdgpu_cs_vcn_ib_zero_count(&len, 280);
1492	*st_size = (len - st_offset) * 4;
1493
1494	/* bitstream buffer */
1495	st_offset = len;
1496	st_size = &ib_cpu[len++];	/* size */
1497	if(vcn_ip_version_major == 1)
1498		ib_cpu[len++] = 0x0000000e;	/* VIDEO_BITSTREAM_BUFFER vcn 1 */
1499	else
1500		ib_cpu[len++] = 0x00000012;	/* VIDEO_BITSTREAM_BUFFER other vcn */
1501
1502	ib_cpu[len++] = 0x00000000;	/* mode */
1503	ib_cpu[len++] = bs_buf.addr >> 32;
1504	ib_cpu[len++] = bs_buf.addr;
1505	ib_cpu[len++] = 0x0001f000;
1506	ib_cpu[len++] = 0x00000000;
1507	*st_size = (len - st_offset) * 4;
1508
1509	/* feedback */
1510	st_offset = len;
1511	st_size = &ib_cpu[len++];	/* size */
1512	if(vcn_ip_version_major == 1)
1513		ib_cpu[len++] = 0x00000010;	/* FEEDBACK_BUFFER vcn 1 */
1514	else
1515		ib_cpu[len++] = 0x00000015;	/* FEEDBACK_BUFFER vcn 2,3 */
1516	ib_cpu[len++] = 0x00000000;
1517	ib_cpu[len++] = fb_buf.addr >> 32;
1518	ib_cpu[len++] = fb_buf.addr;
1519	ib_cpu[len++] = 0x00000010;
1520	ib_cpu[len++] = 0x00000028;
1521	*st_size = (len - st_offset) * 4;
1522
1523	/* intra refresh */
1524	st_offset = len;
1525	st_size = &ib_cpu[len++];
1526	if(vcn_ip_version_major == 1)
1527		ib_cpu[len++] = 0x0000000c;	/* INTRA_REFRESH vcn 1 */
1528	else
1529		ib_cpu[len++] = 0x00000010;	/* INTRA_REFRESH vcn 2,3 */
1530	ib_cpu[len++] = 0x00000000;
1531	ib_cpu[len++] = 0x00000000;
1532	ib_cpu[len++] = 0x00000000;
1533	*st_size = (len - st_offset) * 4;
1534
1535	if(vcn_ip_version_major != 1) {
1536		/* Input Format */
1537		st_offset = len;
1538		st_size = &ib_cpu[len++];
1539		ib_cpu[len++] = 0x0000000c;
1540		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
1541		ib_cpu[len++] = 0x00000000;
1542		ib_cpu[len++] = 0x00000000;
1543		ib_cpu[len++] = 0x00000000;
1544		ib_cpu[len++] = 0x00000000;
1545		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1546		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_PACKING_FORMAT_NV12 */
1547		*st_size = (len - st_offset) * 4;
1548
1549		/* Output Format */
1550		st_offset = len;
1551		st_size = &ib_cpu[len++];
1552		ib_cpu[len++] = 0x0000000d;
1553		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
1554		ib_cpu[len++] = 0x00000000;
1555		ib_cpu[len++] = 0x00000000;
1556		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1557		*st_size = (len - st_offset) * 4;
1558	}
1559	/* op_speed */
1560	st_offset = len;
1561	st_size = &ib_cpu[len++];
1562	ib_cpu[len++] = 0x01000006;	/* SPEED_ENCODING_MODE */
1563	*st_size = (len - st_offset) * 4;
1564
1565	/* op_enc */
1566	st_offset = len;
1567	st_size = &ib_cpu[len++];
1568	ib_cpu[len++] = 0x01000003;
1569	*st_size = (len - st_offset) * 4;
1570
1571	*p_task_size = (len - task_offset) * 4;
1572
1573	if (vcn_unified_ring)
1574		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1575
1576	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1577	CU_ASSERT_EQUAL(r, 0);
1578
1579	/* check result */
1580	check_result(fb_buf, bs_buf, frame_type);
1581
1582	free_resource(&fb_buf);
1583	free_resource(&bs_buf);
1584	free_resource(&input_buf);
1585}
1586
1587static void amdgpu_cs_vcn_enc_encode(void)
1588{
1589	amdgpu_cs_vcn_enc_encode_frame(2);	/* IDR frame */
1590}
1591
1592static void amdgpu_cs_vcn_enc_destroy(void)
1593{
1594	int len = 0, r;
1595	uint32_t *p_task_size = NULL;
1596	uint32_t task_offset = 0, st_offset;
1597	uint32_t *st_size = NULL;
1598	uint32_t fw_maj = 1, fw_min = 9;
1599
1600	if (vcn_ip_version_major == 2) {
1601		fw_maj = 1;
1602		fw_min = 1;
1603	} else if (vcn_ip_version_major == 3) {
1604		fw_maj = 1;
1605		fw_min = 0;
1606	}
1607
1608	num_resources = 0;
1609/* 	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */
1610	resources[num_resources++] = enc_buf.handle;
1611	resources[num_resources++] = ib_handle;
1612
1613	if (vcn_unified_ring)
1614		amdgpu_cs_sq_head(ib_cpu, &len, true);
1615
1616	/* session info */
1617	st_offset = len;
1618	st_size = &ib_cpu[len++];	/* size */
1619	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
1620	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1621	ib_cpu[len++] = enc_buf.addr >> 32;
1622	ib_cpu[len++] = enc_buf.addr;
1623	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
1624	*st_size = (len - st_offset) * 4;
1625
1626	/* task info */
1627	task_offset = len;
1628	st_offset = len;
1629	st_size = &ib_cpu[len++];	/* size */
1630	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
1631	p_task_size = &ib_cpu[len++];
1632	ib_cpu[len++] = enc_task_id++;	/* task_id */
1633	ib_cpu[len++] = 0;	/* feedback */
1634	*st_size = (len - st_offset) * 4;
1635
1636	/*  op close */
1637	st_offset = len;
1638	st_size = &ib_cpu[len++];
1639	ib_cpu[len++] = 0x01000002;	/* RENCODE_IB_OP_CLOSE_SESSION */
1640	*st_size = (len - st_offset) * 4;
1641
1642	*p_task_size = (len - task_offset) * 4;
1643
1644	if (vcn_unified_ring)
1645		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1646
1647	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1648	CU_ASSERT_EQUAL(r, 0);
1649
1650	free_resource(&cpb_buf);
1651	free_resource(&enc_buf);
1652}
1653