1d8807b2fSmrg/*
2d8807b2fSmrg * Copyright 2017 Advanced Micro Devices, Inc.
3d8807b2fSmrg *
4d8807b2fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5d8807b2fSmrg * copy of this software and associated documentation files (the "Software"),
6d8807b2fSmrg * to deal in the Software without restriction, including without limitation
7d8807b2fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d8807b2fSmrg * and/or sell copies of the Software, and to permit persons to whom the
9d8807b2fSmrg * Software is furnished to do so, subject to the following conditions:
10d8807b2fSmrg *
11d8807b2fSmrg * The above copyright notice and this permission notice shall be included in
12d8807b2fSmrg * all copies or substantial portions of the Software.
13d8807b2fSmrg *
14d8807b2fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d8807b2fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d8807b2fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17d8807b2fSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18d8807b2fSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19d8807b2fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20d8807b2fSmrg * OTHER DEALINGS IN THE SOFTWARE.
21d8807b2fSmrg *
22d8807b2fSmrg*/
23d8807b2fSmrg
24d8807b2fSmrg#include <stdio.h>
250ed5401bSmrg#include <string.h>
26d8807b2fSmrg#include <inttypes.h>
270ed5401bSmrg#include <unistd.h>
28d8807b2fSmrg
29d8807b2fSmrg#include "CUnit/Basic.h"
30d8807b2fSmrg
31b0ab5608Smrg#include <unistd.h>
32d8807b2fSmrg#include "util_math.h"
33d8807b2fSmrg
34d8807b2fSmrg#include "amdgpu_test.h"
35d8807b2fSmrg#include "amdgpu_drm.h"
36d8807b2fSmrg#include "amdgpu_internal.h"
37d8807b2fSmrg#include "decode_messages.h"
380ed5401bSmrg#include "frame.h"
39d8807b2fSmrg
40d8807b2fSmrg#define IB_SIZE		4096
41d8807b2fSmrg#define MAX_RESOURCES	16
42d8807b2fSmrg
43b0ab5608Smrg#define DECODE_CMD_MSG_BUFFER                              0x00000000
44b0ab5608Smrg#define DECODE_CMD_DPB_BUFFER                              0x00000001
45b0ab5608Smrg#define DECODE_CMD_DECODING_TARGET_BUFFER                  0x00000002
46b0ab5608Smrg#define DECODE_CMD_FEEDBACK_BUFFER                         0x00000003
47b0ab5608Smrg#define DECODE_CMD_PROB_TBL_BUFFER                         0x00000004
48b0ab5608Smrg#define DECODE_CMD_SESSION_CONTEXT_BUFFER                  0x00000005
49b0ab5608Smrg#define DECODE_CMD_BITSTREAM_BUFFER                        0x00000100
50b0ab5608Smrg#define DECODE_CMD_IT_SCALING_TABLE_BUFFER                 0x00000204
51b0ab5608Smrg#define DECODE_CMD_CONTEXT_BUFFER                          0x00000206
52b0ab5608Smrg
53b0ab5608Smrg#define DECODE_IB_PARAM_DECODE_BUFFER                      (0x00000001)
54b0ab5608Smrg
55b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_MSG_BUFFER                     (0x00000001)
56b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DPB_BUFFER                     (0x00000002)
57b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER               (0x00000004)
58b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER         (0x00000008)
59b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER                (0x00000010)
60b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER              (0x00000200)
61b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER                 (0x00000800)
62b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER                (0x00001000)
63b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER         (0x00100000)
64b0ab5608Smrg
65b0ab5608Smrgstatic bool vcn_dec_sw_ring = false;
66b0ab5608Smrgstatic bool vcn_unified_ring = false;
67b0ab5608Smrg
680ed5401bSmrg#define H264_NAL_TYPE_NON_IDR_SLICE 1
690ed5401bSmrg#define H264_NAL_TYPE_DP_A_SLICE 2
700ed5401bSmrg#define H264_NAL_TYPE_DP_B_SLICE 3
710ed5401bSmrg#define H264_NAL_TYPE_DP_C_SLICE 0x4
720ed5401bSmrg#define H264_NAL_TYPE_IDR_SLICE 0x5
730ed5401bSmrg#define H264_NAL_TYPE_SEI 0x6
740ed5401bSmrg#define H264_NAL_TYPE_SEQ_PARAM 0x7
750ed5401bSmrg#define H264_NAL_TYPE_PIC_PARAM 0x8
760ed5401bSmrg#define H264_NAL_TYPE_ACCESS_UNIT 0x9
770ed5401bSmrg#define H264_NAL_TYPE_END_OF_SEQ 0xa
780ed5401bSmrg#define H264_NAL_TYPE_END_OF_STREAM 0xb
790ed5401bSmrg#define H264_NAL_TYPE_FILLER_DATA 0xc
800ed5401bSmrg#define H264_NAL_TYPE_SEQ_EXTENSION 0xd
810ed5401bSmrg
820ed5401bSmrg#define H264_START_CODE 0x000001
830ed5401bSmrg
84d8807b2fSmrgstruct amdgpu_vcn_bo {
85d8807b2fSmrg	amdgpu_bo_handle handle;
86d8807b2fSmrg	amdgpu_va_handle va_handle;
87d8807b2fSmrg	uint64_t addr;
88d8807b2fSmrg	uint64_t size;
89d8807b2fSmrg	uint8_t *ptr;
90d8807b2fSmrg};
91d8807b2fSmrg
92b0ab5608Smrgtypedef struct rvcn_decode_buffer_s {
93b0ab5608Smrg	unsigned int valid_buf_flag;
94b0ab5608Smrg	unsigned int msg_buffer_address_hi;
95b0ab5608Smrg	unsigned int msg_buffer_address_lo;
96b0ab5608Smrg	unsigned int dpb_buffer_address_hi;
97b0ab5608Smrg	unsigned int dpb_buffer_address_lo;
98b0ab5608Smrg	unsigned int target_buffer_address_hi;
99b0ab5608Smrg	unsigned int target_buffer_address_lo;
100b0ab5608Smrg	unsigned int session_contex_buffer_address_hi;
101b0ab5608Smrg	unsigned int session_contex_buffer_address_lo;
102b0ab5608Smrg	unsigned int bitstream_buffer_address_hi;
103b0ab5608Smrg	unsigned int bitstream_buffer_address_lo;
104b0ab5608Smrg	unsigned int context_buffer_address_hi;
105b0ab5608Smrg	unsigned int context_buffer_address_lo;
106b0ab5608Smrg	unsigned int feedback_buffer_address_hi;
107b0ab5608Smrg	unsigned int feedback_buffer_address_lo;
108b0ab5608Smrg	unsigned int luma_hist_buffer_address_hi;
109b0ab5608Smrg	unsigned int luma_hist_buffer_address_lo;
110b0ab5608Smrg	unsigned int prob_tbl_buffer_address_hi;
111b0ab5608Smrg	unsigned int prob_tbl_buffer_address_lo;
112b0ab5608Smrg	unsigned int sclr_coeff_buffer_address_hi;
113b0ab5608Smrg	unsigned int sclr_coeff_buffer_address_lo;
114b0ab5608Smrg	unsigned int it_sclr_table_buffer_address_hi;
115b0ab5608Smrg	unsigned int it_sclr_table_buffer_address_lo;
116b0ab5608Smrg	unsigned int sclr_target_buffer_address_hi;
117b0ab5608Smrg	unsigned int sclr_target_buffer_address_lo;
118b0ab5608Smrg	unsigned int cenc_size_info_buffer_address_hi;
119b0ab5608Smrg	unsigned int cenc_size_info_buffer_address_lo;
120b0ab5608Smrg	unsigned int mpeg2_pic_param_buffer_address_hi;
121b0ab5608Smrg	unsigned int mpeg2_pic_param_buffer_address_lo;
122b0ab5608Smrg	unsigned int mpeg2_mb_control_buffer_address_hi;
123b0ab5608Smrg	unsigned int mpeg2_mb_control_buffer_address_lo;
124b0ab5608Smrg	unsigned int mpeg2_idct_coeff_buffer_address_hi;
125b0ab5608Smrg	unsigned int mpeg2_idct_coeff_buffer_address_lo;
126b0ab5608Smrg} rvcn_decode_buffer_t;
127b0ab5608Smrg
128b0ab5608Smrgtypedef struct rvcn_decode_ib_package_s {
129b0ab5608Smrg	unsigned int package_size;
130b0ab5608Smrg	unsigned int package_type;
131b0ab5608Smrg} rvcn_decode_ib_package_t;
132b0ab5608Smrg
133b0ab5608Smrg
1345324fb0dSmrgstruct amdgpu_vcn_reg {
1355324fb0dSmrg	uint32_t data0;
1365324fb0dSmrg	uint32_t data1;
1375324fb0dSmrg	uint32_t cmd;
1385324fb0dSmrg	uint32_t nop;
1395324fb0dSmrg	uint32_t cntl;
1405324fb0dSmrg};
1415324fb0dSmrg
1420ed5401bSmrgtypedef struct BufferInfo_t {
1430ed5401bSmrg	uint32_t numOfBitsInBuffer;
1440ed5401bSmrg	const uint8_t *decBuffer;
1450ed5401bSmrg	uint8_t decData;
1460ed5401bSmrg	uint32_t decBufferSize;
1470ed5401bSmrg	const uint8_t *end;
1480ed5401bSmrg} bufferInfo;
1490ed5401bSmrg
1500ed5401bSmrgtypedef struct h264_decode_t {
1510ed5401bSmrg	uint8_t profile;
1520ed5401bSmrg	uint8_t level_idc;
1530ed5401bSmrg	uint8_t nal_ref_idc;
1540ed5401bSmrg	uint8_t nal_unit_type;
1550ed5401bSmrg	uint32_t pic_width, pic_height;
1560ed5401bSmrg	uint32_t slice_type;
1570ed5401bSmrg} h264_decode;
1580ed5401bSmrg
159d8807b2fSmrgstatic amdgpu_device_handle device_handle;
160d8807b2fSmrgstatic uint32_t major_version;
161d8807b2fSmrgstatic uint32_t minor_version;
162d8807b2fSmrgstatic uint32_t family_id;
16341687f09Smrgstatic uint32_t chip_rev;
16441687f09Smrgstatic uint32_t chip_id;
1659bd392adSmrgstatic uint32_t asic_id;
16641687f09Smrgstatic uint32_t chip_rev;
1670ed5401bSmrgstatic struct amdgpu_vcn_bo enc_buf;
1680ed5401bSmrgstatic struct amdgpu_vcn_bo cpb_buf;
1690ed5401bSmrgstatic uint32_t enc_task_id;
170d8807b2fSmrg
171d8807b2fSmrgstatic amdgpu_context_handle context_handle;
172d8807b2fSmrgstatic amdgpu_bo_handle ib_handle;
173d8807b2fSmrgstatic amdgpu_va_handle ib_va_handle;
174d8807b2fSmrgstatic uint64_t ib_mc_address;
175d8807b2fSmrgstatic uint32_t *ib_cpu;
176b0ab5608Smrgstatic uint32_t *ib_checksum;
177b0ab5608Smrgstatic uint32_t *ib_size_in_dw;
178b0ab5608Smrg
179b0ab5608Smrgstatic rvcn_decode_buffer_t *decode_buffer;
180bbff01ceSmrgstruct amdgpu_vcn_bo session_ctx_buf;
181d8807b2fSmrg
182d8807b2fSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES];
183d8807b2fSmrgstatic unsigned num_resources;
1840ed5401bSmrg
1850ed5401bSmrgstatic uint8_t vcn_reg_index;
1860ed5401bSmrgstatic struct amdgpu_vcn_reg reg[] = {
1870ed5401bSmrg	{0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6},
1880ed5401bSmrg	{0x504, 0x505, 0x503, 0x53f, 0x506},
1890ed5401bSmrg	{0x10, 0x11, 0xf, 0x29, 0x26d},
1900ed5401bSmrg};
1910ed5401bSmrg
1920ed5401bSmrguint32_t gWidth, gHeight, gSliceType;
193b0ab5608Smrgstatic uint32_t vcn_ip_version_major;
194b0ab5608Smrgstatic uint32_t vcn_ip_version_minor;
195d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void);
196d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void);
197d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void);
198d8807b2fSmrg
199d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void);
200d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void);
201d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void);
202d8807b2fSmrg
203b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc);
204b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end);
2050ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count);
2060ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo);
2070ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo);
2080ed5401bSmrgstatic inline int bs_eof(bufferInfo *bufinfo);
2090ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n);
2100ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo);
2110ed5401bSmrgstatic uint32_t remove_03 (uint8_t *bptr, uint32_t len);
2120ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo);
2130ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo);
2140ed5401bSmrgstatic void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo);
2150ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo);
2160ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen);
2170ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size);
2180ed5401bSmrg
219d8807b2fSmrgCU_TestInfo vcn_tests[] = {
220d8807b2fSmrg
221d8807b2fSmrg	{ "VCN DEC create",  amdgpu_cs_vcn_dec_create },
222d8807b2fSmrg	{ "VCN DEC decode",  amdgpu_cs_vcn_dec_decode },
223d8807b2fSmrg	{ "VCN DEC destroy",  amdgpu_cs_vcn_dec_destroy },
224d8807b2fSmrg
225d8807b2fSmrg	{ "VCN ENC create",  amdgpu_cs_vcn_enc_create },
2260ed5401bSmrg	{ "VCN ENC encode",  amdgpu_cs_vcn_enc_encode },
227d8807b2fSmrg	{ "VCN ENC destroy",  amdgpu_cs_vcn_enc_destroy },
228d8807b2fSmrg	CU_TEST_INFO_NULL,
229d8807b2fSmrg};
230d8807b2fSmrg
23100a23bdaSmrgCU_BOOL suite_vcn_tests_enable(void)
23200a23bdaSmrg{
23341687f09Smrg	struct drm_amdgpu_info_hw_ip info;
234b0ab5608Smrg	bool enc_ring, dec_ring;
235b0ab5608Smrg	int r;
23600a23bdaSmrg
23700a23bdaSmrg	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
23800a23bdaSmrg				   &minor_version, &device_handle))
23900a23bdaSmrg		return CU_FALSE;
24000a23bdaSmrg
24100a23bdaSmrg	family_id = device_handle->info.family_id;
2429bd392adSmrg	asic_id = device_handle->info.asic_id;
24341687f09Smrg	chip_rev = device_handle->info.chip_rev;
24441687f09Smrg	chip_id = device_handle->info.chip_external_rev;
24541687f09Smrg
246b0ab5608Smrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info);
247b0ab5608Smrg	if (!r) {
248b0ab5608Smrg		vcn_ip_version_major = info.hw_ip_version_major;
249b0ab5608Smrg		vcn_ip_version_minor = info.hw_ip_version_minor;
250b0ab5608Smrg		enc_ring = !!info.available_rings;
251b0ab5608Smrg		/* in vcn 4.0 it re-uses encoding queue as unified queue */
252b0ab5608Smrg		if (vcn_ip_version_major >= 4) {
253b0ab5608Smrg			vcn_unified_ring = true;
254b0ab5608Smrg			vcn_dec_sw_ring = true;
255b0ab5608Smrg			dec_ring = enc_ring;
256b0ab5608Smrg		} else {
257b0ab5608Smrg			r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
258b0ab5608Smrg			dec_ring = !!info.available_rings;
259b0ab5608Smrg		}
260b0ab5608Smrg	}
26100a23bdaSmrg
26200a23bdaSmrg	if (amdgpu_device_deinitialize(device_handle))
2630ed5401bSmrg		return CU_FALSE;
26400a23bdaSmrg
265b0ab5608Smrg	if (r) {
266b0ab5608Smrg		printf("\n\nASIC query hw info failed\n");
267b0ab5608Smrg		return CU_FALSE;
268b0ab5608Smrg	}
269b0ab5608Smrg
270b0ab5608Smrg	if (!(dec_ring || enc_ring) ||
27141687f09Smrg	    (family_id < AMDGPU_FAMILY_RV &&
27241687f09Smrg	     (family_id == AMDGPU_FAMILY_AI &&
2734babd585Smrg	      (chip_id - chip_rev) < 0x32))) {  /* Arcturus */
27400a23bdaSmrg		printf("\n\nThe ASIC NOT support VCN, suite disabled\n");
27500a23bdaSmrg		return CU_FALSE;
27600a23bdaSmrg	}
27700a23bdaSmrg
278b0ab5608Smrg	if (!dec_ring) {
279b0ab5608Smrg		amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE);
280b0ab5608Smrg		amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE);
281b0ab5608Smrg		amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE);
282b0ab5608Smrg	}
283b0ab5608Smrg
284b0ab5608Smrg	if (family_id == AMDGPU_FAMILY_AI || !enc_ring) {
28541687f09Smrg		amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE);
2860ed5401bSmrg		amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE);
28741687f09Smrg		amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE);
28841687f09Smrg	}
28941687f09Smrg
290b0ab5608Smrg	if (vcn_ip_version_major == 1)
2910ed5401bSmrg		vcn_reg_index = 0;
292b0ab5608Smrg	else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0)
2930ed5401bSmrg		vcn_reg_index = 1;
294b0ab5608Smrg	else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) ||
295b0ab5608Smrg				vcn_ip_version_major == 3)
2960ed5401bSmrg		vcn_reg_index = 2;
2975324fb0dSmrg
29800a23bdaSmrg	return CU_TRUE;
29900a23bdaSmrg}
30000a23bdaSmrg
301d8807b2fSmrgint suite_vcn_tests_init(void)
302d8807b2fSmrg{
303d8807b2fSmrg	int r;
304d8807b2fSmrg
305d8807b2fSmrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
306d8807b2fSmrg				     &minor_version, &device_handle);
307d8807b2fSmrg	if (r)
308d8807b2fSmrg		return CUE_SINIT_FAILED;
309d8807b2fSmrg
310d8807b2fSmrg	family_id = device_handle->info.family_id;
311d8807b2fSmrg
312d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
313d8807b2fSmrg	if (r)
314d8807b2fSmrg		return CUE_SINIT_FAILED;
315d8807b2fSmrg
316d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
317d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
318d8807b2fSmrg				    &ib_handle, (void**)&ib_cpu,
319d8807b2fSmrg				    &ib_mc_address, &ib_va_handle);
320d8807b2fSmrg	if (r)
321d8807b2fSmrg		return CUE_SINIT_FAILED;
322d8807b2fSmrg
323d8807b2fSmrg	return CUE_SUCCESS;
324d8807b2fSmrg}
325d8807b2fSmrg
326d8807b2fSmrgint suite_vcn_tests_clean(void)
327d8807b2fSmrg{
328d8807b2fSmrg	int r;
329d8807b2fSmrg
33000a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
33100a23bdaSmrg			     ib_mc_address, IB_SIZE);
33200a23bdaSmrg	if (r)
33300a23bdaSmrg		return CUE_SCLEAN_FAILED;
33400a23bdaSmrg
33500a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
33600a23bdaSmrg	if (r)
33700a23bdaSmrg		return CUE_SCLEAN_FAILED;
33800a23bdaSmrg
33900a23bdaSmrg	r = amdgpu_device_deinitialize(device_handle);
34000a23bdaSmrg	if (r)
34100a23bdaSmrg		return CUE_SCLEAN_FAILED;
342d8807b2fSmrg
343d8807b2fSmrg	return CUE_SUCCESS;
344d8807b2fSmrg}
345d8807b2fSmrg
346b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc)
347b0ab5608Smrg{
348b0ab5608Smrg	/* signature */
349b0ab5608Smrg	*(base + (*offset)++) = 0x00000010;
350b0ab5608Smrg	*(base + (*offset)++) = 0x30000002;
351b0ab5608Smrg	ib_checksum = base + (*offset)++;
352b0ab5608Smrg	ib_size_in_dw = base + (*offset)++;
353b0ab5608Smrg
354b0ab5608Smrg	/* engine info */
355b0ab5608Smrg	*(base + (*offset)++) = 0x00000010;
356b0ab5608Smrg	*(base + (*offset)++) = 0x30000001;
357b0ab5608Smrg	*(base + (*offset)++) = enc ? 2 : 3;
358b0ab5608Smrg	*(base + (*offset)++) = 0x00000000;
359b0ab5608Smrg}
360b0ab5608Smrg
361b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end)
362b0ab5608Smrg{
363b0ab5608Smrg	uint32_t size_in_dw;
364b0ab5608Smrg	uint32_t checksum = 0;
365b0ab5608Smrg
366b0ab5608Smrg	/* if the pointers are invalid, no need to process */
367b0ab5608Smrg	if (ib_checksum == NULL || ib_size_in_dw == NULL)
368b0ab5608Smrg		return;
369b0ab5608Smrg
370b0ab5608Smrg	size_in_dw = end - ib_size_in_dw - 1;
371b0ab5608Smrg	*ib_size_in_dw = size_in_dw;
372b0ab5608Smrg	*(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
373b0ab5608Smrg
374b0ab5608Smrg	for (int i = 0; i < size_in_dw; i++)
375b0ab5608Smrg		checksum += *(ib_checksum + 2 + i);
376b0ab5608Smrg
377b0ab5608Smrg	*ib_checksum = checksum;
378b0ab5608Smrg
379b0ab5608Smrg	ib_checksum = NULL;
380b0ab5608Smrg	ib_size_in_dw = NULL;
381b0ab5608Smrg}
382b0ab5608Smrg
383d8807b2fSmrgstatic int submit(unsigned ndw, unsigned ip)
384d8807b2fSmrg{
385d8807b2fSmrg	struct amdgpu_cs_request ibs_request = {0};
386d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info = {0};
387d8807b2fSmrg	struct amdgpu_cs_fence fence_status = {0};
388d8807b2fSmrg	uint32_t expired;
389d8807b2fSmrg	int r;
390d8807b2fSmrg
391d8807b2fSmrg	ib_info.ib_mc_address = ib_mc_address;
392d8807b2fSmrg	ib_info.size = ndw;
393d8807b2fSmrg
394d8807b2fSmrg	ibs_request.ip_type = ip;
395d8807b2fSmrg
396d8807b2fSmrg	r = amdgpu_bo_list_create(device_handle, num_resources, resources,
397d8807b2fSmrg				  NULL, &ibs_request.resources);
398d8807b2fSmrg	if (r)
399d8807b2fSmrg		return r;
400d8807b2fSmrg
401d8807b2fSmrg	ibs_request.number_of_ibs = 1;
402d8807b2fSmrg	ibs_request.ibs = &ib_info;
403d8807b2fSmrg	ibs_request.fence_info.handle = NULL;
404d8807b2fSmrg
405d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
406d8807b2fSmrg	if (r)
407d8807b2fSmrg		return r;
408d8807b2fSmrg
409d8807b2fSmrg	r = amdgpu_bo_list_destroy(ibs_request.resources);
410d8807b2fSmrg	if (r)
411d8807b2fSmrg		return r;
412d8807b2fSmrg
413d8807b2fSmrg	fence_status.context = context_handle;
414d8807b2fSmrg	fence_status.ip_type = ip;
415d8807b2fSmrg	fence_status.fence = ibs_request.seq_no;
416d8807b2fSmrg
417d8807b2fSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
418d8807b2fSmrg					 AMDGPU_TIMEOUT_INFINITE,
419d8807b2fSmrg					 0, &expired);
420d8807b2fSmrg	if (r)
421d8807b2fSmrg		return r;
422d8807b2fSmrg
423d8807b2fSmrg	return 0;
424d8807b2fSmrg}
425d8807b2fSmrg
426d8807b2fSmrgstatic void alloc_resource(struct amdgpu_vcn_bo *vcn_bo,
427d8807b2fSmrg			unsigned size, unsigned domain)
428d8807b2fSmrg{
429d8807b2fSmrg	struct amdgpu_bo_alloc_request req = {0};
430d8807b2fSmrg	amdgpu_bo_handle buf_handle;
431d8807b2fSmrg	amdgpu_va_handle va_handle;
432d8807b2fSmrg	uint64_t va = 0;
433d8807b2fSmrg	int r;
434d8807b2fSmrg
435d8807b2fSmrg	req.alloc_size = ALIGN(size, 4096);
436d8807b2fSmrg	req.preferred_heap = domain;
437d8807b2fSmrg	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
438d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
439d8807b2fSmrg	r = amdgpu_va_range_alloc(device_handle,
440d8807b2fSmrg				  amdgpu_gpu_va_range_general,
441d8807b2fSmrg				  req.alloc_size, 1, 0, &va,
442d8807b2fSmrg				  &va_handle, 0);
443d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
444d8807b2fSmrg	r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
445d8807b2fSmrg			    AMDGPU_VA_OP_MAP);
446d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
447d8807b2fSmrg	vcn_bo->addr = va;
448d8807b2fSmrg	vcn_bo->handle = buf_handle;
449d8807b2fSmrg	vcn_bo->size = req.alloc_size;
450d8807b2fSmrg	vcn_bo->va_handle = va_handle;
451d8807b2fSmrg	r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr);
452d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
453d8807b2fSmrg	memset(vcn_bo->ptr, 0, size);
454d8807b2fSmrg	r = amdgpu_bo_cpu_unmap(vcn_bo->handle);
455d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
456d8807b2fSmrg}
457d8807b2fSmrg
458d8807b2fSmrgstatic void free_resource(struct amdgpu_vcn_bo *vcn_bo)
459d8807b2fSmrg{
460d8807b2fSmrg	int r;
461d8807b2fSmrg
462d8807b2fSmrg	r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size,
463d8807b2fSmrg			    vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
464d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
465d8807b2fSmrg
466d8807b2fSmrg	r = amdgpu_va_range_free(vcn_bo->va_handle);
467d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
468d8807b2fSmrg
469d8807b2fSmrg	r = amdgpu_bo_free(vcn_bo->handle);
470d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
471d8807b2fSmrg	memset(vcn_bo, 0, sizeof(*vcn_bo));
472d8807b2fSmrg}
473d8807b2fSmrg
474d8807b2fSmrgstatic void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
475d8807b2fSmrg{
476b0ab5608Smrg	if (vcn_dec_sw_ring == false) {
477b0ab5608Smrg		ib_cpu[(*idx)++] = reg[vcn_reg_index].data0;
478b0ab5608Smrg		ib_cpu[(*idx)++] = addr;
479b0ab5608Smrg		ib_cpu[(*idx)++] = reg[vcn_reg_index].data1;
480b0ab5608Smrg		ib_cpu[(*idx)++] = addr >> 32;
481b0ab5608Smrg		ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd;
482b0ab5608Smrg		ib_cpu[(*idx)++] = cmd << 1;
483b0ab5608Smrg		return;
484b0ab5608Smrg	}
485b0ab5608Smrg
486b0ab5608Smrg	/* Support decode software ring message */
487b0ab5608Smrg	if (!(*idx)) {
488b0ab5608Smrg		rvcn_decode_ib_package_t *ib_header;
489b0ab5608Smrg
490b0ab5608Smrg		if (vcn_unified_ring)
491b0ab5608Smrg			amdgpu_cs_sq_head(ib_cpu, idx, false);
492b0ab5608Smrg
493b0ab5608Smrg		ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx];
494b0ab5608Smrg		ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
495b0ab5608Smrg			sizeof(struct rvcn_decode_ib_package_s);
496b0ab5608Smrg
497b0ab5608Smrg		(*idx)++;
498b0ab5608Smrg		ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER);
499b0ab5608Smrg		(*idx)++;
500b0ab5608Smrg
501b0ab5608Smrg		decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]);
502b0ab5608Smrg		*idx += sizeof(struct rvcn_decode_buffer_s) / 4;
503b0ab5608Smrg		memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
504b0ab5608Smrg	}
505b0ab5608Smrg
506b0ab5608Smrg	switch(cmd) {
507b0ab5608Smrg		case DECODE_CMD_MSG_BUFFER:
508b0ab5608Smrg			decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER;
509b0ab5608Smrg			decode_buffer->msg_buffer_address_hi = (addr >> 32);
510b0ab5608Smrg			decode_buffer->msg_buffer_address_lo = (addr);
511b0ab5608Smrg		break;
512b0ab5608Smrg		case DECODE_CMD_DPB_BUFFER:
513b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER);
514b0ab5608Smrg			decode_buffer->dpb_buffer_address_hi = (addr >> 32);
515b0ab5608Smrg			decode_buffer->dpb_buffer_address_lo = (addr);
516b0ab5608Smrg		break;
517b0ab5608Smrg		case DECODE_CMD_DECODING_TARGET_BUFFER:
518b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
519b0ab5608Smrg			decode_buffer->target_buffer_address_hi = (addr >> 32);
520b0ab5608Smrg			decode_buffer->target_buffer_address_lo = (addr);
521b0ab5608Smrg		break;
522b0ab5608Smrg		case DECODE_CMD_FEEDBACK_BUFFER:
523b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
524b0ab5608Smrg			decode_buffer->feedback_buffer_address_hi = (addr >> 32);
525b0ab5608Smrg			decode_buffer->feedback_buffer_address_lo = (addr);
526b0ab5608Smrg		break;
527b0ab5608Smrg		case DECODE_CMD_PROB_TBL_BUFFER:
528b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
529b0ab5608Smrg			decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
530b0ab5608Smrg			decode_buffer->prob_tbl_buffer_address_lo = (addr);
531b0ab5608Smrg		break;
532b0ab5608Smrg		case DECODE_CMD_SESSION_CONTEXT_BUFFER:
533b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
534b0ab5608Smrg			decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
535b0ab5608Smrg			decode_buffer->session_contex_buffer_address_lo = (addr);
536b0ab5608Smrg		break;
537b0ab5608Smrg		case DECODE_CMD_BITSTREAM_BUFFER:
538b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
539b0ab5608Smrg			decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
540b0ab5608Smrg			decode_buffer->bitstream_buffer_address_lo = (addr);
541b0ab5608Smrg		break;
542b0ab5608Smrg		case DECODE_CMD_IT_SCALING_TABLE_BUFFER:
543b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
544b0ab5608Smrg			decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
545b0ab5608Smrg			decode_buffer->it_sclr_table_buffer_address_lo = (addr);
546b0ab5608Smrg		break;
547b0ab5608Smrg		case DECODE_CMD_CONTEXT_BUFFER:
548b0ab5608Smrg			decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
549b0ab5608Smrg			decode_buffer->context_buffer_address_hi = (addr >> 32);
550b0ab5608Smrg			decode_buffer->context_buffer_address_lo = (addr);
551b0ab5608Smrg		break;
552b0ab5608Smrg		default:
553b0ab5608Smrg			printf("Not Support!\n");
554b0ab5608Smrg	}
555d8807b2fSmrg}
556d8807b2fSmrg
557d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void)
558d8807b2fSmrg{
559d8807b2fSmrg	struct amdgpu_vcn_bo msg_buf;
560b0ab5608Smrg	unsigned ip;
561d8807b2fSmrg	int len, r;
562d8807b2fSmrg
563d8807b2fSmrg	num_resources  = 0;
564d8807b2fSmrg	alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
565bbff01ceSmrg	alloc_resource(&session_ctx_buf, 32 * 4096, AMDGPU_GEM_DOMAIN_VRAM);
566d8807b2fSmrg	resources[num_resources++] = msg_buf.handle;
567bbff01ceSmrg	resources[num_resources++] = session_ctx_buf.handle;
568d8807b2fSmrg	resources[num_resources++] = ib_handle;
569d8807b2fSmrg
570d8807b2fSmrg	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
571d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
572d8807b2fSmrg
573d8807b2fSmrg	memset(msg_buf.ptr, 0, 4096);
574d8807b2fSmrg	memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg));
575d8807b2fSmrg
576d8807b2fSmrg	len = 0;
577bbff01ceSmrg
578bbff01ceSmrg	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
579bbff01ceSmrg	if (vcn_dec_sw_ring == true) {
580b0ab5608Smrg		vcn_dec_cmd(msg_buf.addr, 0, &len);
581bbff01ceSmrg	} else {
582b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].data0;
583b0ab5608Smrg		ib_cpu[len++] = msg_buf.addr;
584b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].data1;
585b0ab5608Smrg		ib_cpu[len++] = msg_buf.addr >> 32;
586b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].cmd;
5876532f28eSmrg		ib_cpu[len++] = 0;
588b0ab5608Smrg		for (; len % 16; ) {
589b0ab5608Smrg			ib_cpu[len++] = reg[vcn_reg_index].nop;
590b0ab5608Smrg			ib_cpu[len++] = 0;
591b0ab5608Smrg		}
5926532f28eSmrg	}
593d8807b2fSmrg
594b0ab5608Smrg	if (vcn_unified_ring) {
595b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
596b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_ENC;
597b0ab5608Smrg	} else
598b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_DEC;
599b0ab5608Smrg
600b0ab5608Smrg	r = submit(len, ip);
601b0ab5608Smrg
602d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
603d8807b2fSmrg
604d8807b2fSmrg	free_resource(&msg_buf);
605d8807b2fSmrg}
606d8807b2fSmrg
607d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void)
608d8807b2fSmrg{
60900a23bdaSmrg	const unsigned dpb_size = 15923584, dt_size = 737280;
610d8807b2fSmrg	uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum;
611d8807b2fSmrg	struct amdgpu_vcn_bo dec_buf;
612d8807b2fSmrg	int size, len, i, r;
613b0ab5608Smrg	unsigned ip;
614d8807b2fSmrg	uint8_t *dec;
615d8807b2fSmrg
616d8807b2fSmrg	size = 4*1024; /* msg */
617d8807b2fSmrg	size += 4*1024; /* fb */
618d8807b2fSmrg	size += 4096; /*it_scaling_table*/
619d8807b2fSmrg	size += ALIGN(sizeof(uvd_bitstream), 4*1024);
620d8807b2fSmrg	size += ALIGN(dpb_size, 4*1024);
621d8807b2fSmrg	size += ALIGN(dt_size, 4*1024);
622d8807b2fSmrg
6230ed5401bSmrg	num_resources = 0;
624d8807b2fSmrg	alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT);
625d8807b2fSmrg	resources[num_resources++] = dec_buf.handle;
626d8807b2fSmrg	resources[num_resources++] = ib_handle;
627d8807b2fSmrg
628d8807b2fSmrg	r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr);
629d8807b2fSmrg	dec = dec_buf.ptr;
630d8807b2fSmrg
631d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
632d8807b2fSmrg	memset(dec_buf.ptr, 0, size);
633d8807b2fSmrg	memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg));
634d8807b2fSmrg	memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg),
635d8807b2fSmrg			avc_decode_msg, sizeof(avc_decode_msg));
636d8807b2fSmrg
637d8807b2fSmrg	dec += 4*1024;
6389bd392adSmrg	memcpy(dec, feedback_msg, sizeof(feedback_msg));
639d8807b2fSmrg	dec += 4*1024;
640d8807b2fSmrg	memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table));
641d8807b2fSmrg
642d8807b2fSmrg	dec += 4*1024;
643d8807b2fSmrg	memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream));
644d8807b2fSmrg
645d8807b2fSmrg	dec += ALIGN(sizeof(uvd_bitstream), 4*1024);
646d8807b2fSmrg
647d8807b2fSmrg	dec += ALIGN(dpb_size, 4*1024);
648d8807b2fSmrg
649d8807b2fSmrg	msg_addr = dec_buf.addr;
650d8807b2fSmrg	fb_addr = msg_addr + 4*1024;
651d8807b2fSmrg	it_addr = fb_addr + 4*1024;
652d8807b2fSmrg	bs_addr = it_addr + 4*1024;
653d8807b2fSmrg	dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
654d8807b2fSmrg	ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
655d8807b2fSmrg	dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
656d8807b2fSmrg
657d8807b2fSmrg	len = 0;
658bbff01ceSmrg	vcn_dec_cmd(session_ctx_buf.addr, 0x5, &len);
659d8807b2fSmrg	vcn_dec_cmd(msg_addr, 0x0, &len);
660d8807b2fSmrg	vcn_dec_cmd(dpb_addr, 0x1, &len);
661d8807b2fSmrg	vcn_dec_cmd(dt_addr, 0x2, &len);
662d8807b2fSmrg	vcn_dec_cmd(fb_addr, 0x3, &len);
663d8807b2fSmrg	vcn_dec_cmd(bs_addr, 0x100, &len);
664d8807b2fSmrg	vcn_dec_cmd(it_addr, 0x204, &len);
665d8807b2fSmrg	vcn_dec_cmd(ctx_addr, 0x206, &len);
666d8807b2fSmrg
667b0ab5608Smrg	if (vcn_dec_sw_ring == false) {
668b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].cntl;
669b0ab5608Smrg		ib_cpu[len++] = 0x1;
670b0ab5608Smrg		for (; len % 16; ) {
671b0ab5608Smrg			ib_cpu[len++] = reg[vcn_reg_index].nop;
672b0ab5608Smrg			ib_cpu[len++] = 0;
673b0ab5608Smrg		}
6746532f28eSmrg	}
675d8807b2fSmrg
676b0ab5608Smrg	if (vcn_unified_ring) {
677b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
678b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_ENC;
679b0ab5608Smrg	} else
680b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_DEC;
681b0ab5608Smrg
682b0ab5608Smrg	r = submit(len, ip);
683d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
684d8807b2fSmrg
685d8807b2fSmrg	for (i = 0, sum = 0; i < dt_size; ++i)
686d8807b2fSmrg		sum += dec[i];
687d8807b2fSmrg
688d8807b2fSmrg	CU_ASSERT_EQUAL(sum, SUM_DECODE);
689d8807b2fSmrg
690d8807b2fSmrg	free_resource(&dec_buf);
691d8807b2fSmrg}
692d8807b2fSmrg
693d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void)
694d8807b2fSmrg{
695d8807b2fSmrg	struct amdgpu_vcn_bo msg_buf;
696b0ab5608Smrg	unsigned ip;
697d8807b2fSmrg	int len, r;
698d8807b2fSmrg
6990ed5401bSmrg	num_resources = 0;
700d8807b2fSmrg	alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT);
701d8807b2fSmrg	resources[num_resources++] = msg_buf.handle;
702d8807b2fSmrg	resources[num_resources++] = ib_handle;
703d8807b2fSmrg
704d8807b2fSmrg	r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
705d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
706d8807b2fSmrg
707d8807b2fSmrg	memset(msg_buf.ptr, 0, 1024);
708d8807b2fSmrg	memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg));
709d8807b2fSmrg
710d8807b2fSmrg	len = 0;
711bbff01ceSmrg	vcn_dec_cmd(session_ctx_buf.addr, 5, &len);
712bbff01ceSmrg	if (vcn_dec_sw_ring == true) {
713b0ab5608Smrg		vcn_dec_cmd(msg_buf.addr, 0, &len);
714bbff01ceSmrg	} else {
715b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].data0;
716b0ab5608Smrg		ib_cpu[len++] = msg_buf.addr;
717b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].data1;
718b0ab5608Smrg		ib_cpu[len++] = msg_buf.addr >> 32;
719b0ab5608Smrg		ib_cpu[len++] = reg[vcn_reg_index].cmd;
7206532f28eSmrg		ib_cpu[len++] = 0;
721b0ab5608Smrg		for (; len % 16; ) {
722b0ab5608Smrg			ib_cpu[len++] = reg[vcn_reg_index].nop;
723b0ab5608Smrg			ib_cpu[len++] = 0;
724b0ab5608Smrg		}
7256532f28eSmrg	}
726d8807b2fSmrg
727b0ab5608Smrg	if (vcn_unified_ring) {
728b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
729b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_ENC;
730b0ab5608Smrg	} else
731b0ab5608Smrg		ip = AMDGPU_HW_IP_VCN_DEC;
732b0ab5608Smrg
733b0ab5608Smrg	r = submit(len, ip);
734d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
735d8807b2fSmrg
736d8807b2fSmrg	free_resource(&msg_buf);
737bbff01ceSmrg	free_resource(&session_ctx_buf);
738d8807b2fSmrg}
739d8807b2fSmrg
740d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void)
741d8807b2fSmrg{
7420ed5401bSmrg	int len, r;
7430ed5401bSmrg	uint32_t *p_task_size = NULL;
7440ed5401bSmrg	uint32_t task_offset = 0, st_offset;
7450ed5401bSmrg	uint32_t *st_size = NULL;
7460ed5401bSmrg	unsigned width = 160, height = 128, buf_size;
7470ed5401bSmrg	uint32_t fw_maj = 1, fw_min = 9;
7480ed5401bSmrg
749b0ab5608Smrg	if (vcn_ip_version_major == 2) {
7500ed5401bSmrg		fw_maj = 1;
7510ed5401bSmrg		fw_min = 1;
752b0ab5608Smrg	} else if (vcn_ip_version_major == 3) {
7530ed5401bSmrg		fw_maj = 1;
7540ed5401bSmrg		fw_min = 0;
7550ed5401bSmrg	}
7560ed5401bSmrg
7570ed5401bSmrg	gWidth = width;
7580ed5401bSmrg	gHeight = height;
7590ed5401bSmrg	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
7600ed5401bSmrg	enc_task_id = 1;
7610ed5401bSmrg
7620ed5401bSmrg	num_resources = 0;
7630ed5401bSmrg	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT);
7640ed5401bSmrg	alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT);
7650ed5401bSmrg	resources[num_resources++] = enc_buf.handle;
7660ed5401bSmrg	resources[num_resources++] = cpb_buf.handle;
7670ed5401bSmrg	resources[num_resources++] = ib_handle;
7680ed5401bSmrg
7690ed5401bSmrg	r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr);
7700ed5401bSmrg	memset(enc_buf.ptr, 0, 128 * 1024);
7710ed5401bSmrg	r = amdgpu_bo_cpu_unmap(enc_buf.handle);
7720ed5401bSmrg
7730ed5401bSmrg	r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr);
7740ed5401bSmrg	memset(enc_buf.ptr, 0, buf_size * 2);
7750ed5401bSmrg	r = amdgpu_bo_cpu_unmap(cpb_buf.handle);
7760ed5401bSmrg
7770ed5401bSmrg	len = 0;
778b0ab5608Smrg
779b0ab5608Smrg	if (vcn_unified_ring)
780b0ab5608Smrg		amdgpu_cs_sq_head(ib_cpu, &len, true);
781b0ab5608Smrg
7820ed5401bSmrg	/* session info */
7830ed5401bSmrg	st_offset = len;
7840ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
7850ed5401bSmrg	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
7860ed5401bSmrg	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
7870ed5401bSmrg	ib_cpu[len++] = enc_buf.addr >> 32;
7880ed5401bSmrg	ib_cpu[len++] = enc_buf.addr;
7890ed5401bSmrg	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
7900ed5401bSmrg	*st_size = (len - st_offset) * 4;
7910ed5401bSmrg
7920ed5401bSmrg	/* task info */
7930ed5401bSmrg	task_offset = len;
7940ed5401bSmrg	st_offset = len;
7950ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
7960ed5401bSmrg	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
7970ed5401bSmrg	p_task_size = &ib_cpu[len++];
7980ed5401bSmrg	ib_cpu[len++] = enc_task_id++;	/* task_id */
7990ed5401bSmrg	ib_cpu[len++] = 0;	/* feedback */
8000ed5401bSmrg	*st_size = (len - st_offset) * 4;
8010ed5401bSmrg
8020ed5401bSmrg	/* op init */
8030ed5401bSmrg	st_offset = len;
8040ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8050ed5401bSmrg	ib_cpu[len++] = 0x01000001;	/* RENCODE_IB_OP_INITIALIZE */
8060ed5401bSmrg	*st_size = (len - st_offset) * 4;
8070ed5401bSmrg
8080ed5401bSmrg	/* session_init */
8090ed5401bSmrg	st_offset = len;
8100ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8110ed5401bSmrg	ib_cpu[len++] = 0x00000003;	/* RENCODE_IB_PARAM_SESSION_INIT */
8120ed5401bSmrg	ib_cpu[len++] = 1;	/* RENCODE_ENCODE_STANDARD_H264 */
8130ed5401bSmrg	ib_cpu[len++] = width;
8140ed5401bSmrg	ib_cpu[len++] = height;
8150ed5401bSmrg	ib_cpu[len++] = 0;
8160ed5401bSmrg	ib_cpu[len++] = 0;
8170ed5401bSmrg	ib_cpu[len++] = 0;	/* pre encode mode */
8180ed5401bSmrg	ib_cpu[len++] = 0;	/* chroma enabled : false */
819bbff01ceSmrg	ib_cpu[len++] = 0;
820bbff01ceSmrg	ib_cpu[len++] = 0;
8210ed5401bSmrg	*st_size = (len - st_offset) * 4;
8220ed5401bSmrg
8230ed5401bSmrg	/* slice control */
8240ed5401bSmrg	st_offset = len;
8250ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8260ed5401bSmrg	ib_cpu[len++] = 0x00200001;	/* RENCODE_H264_IB_PARAM_SLICE_CONTROL */
8270ed5401bSmrg	ib_cpu[len++] = 0;	/* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */
8280ed5401bSmrg	ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16;
8290ed5401bSmrg	*st_size = (len - st_offset) * 4;
8300ed5401bSmrg
8310ed5401bSmrg	/* enc spec misc */
8320ed5401bSmrg	st_offset = len;
8330ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8340ed5401bSmrg	ib_cpu[len++] = 0x00200002;	/* RENCODE_H264_IB_PARAM_SPEC_MISC */
8350ed5401bSmrg	ib_cpu[len++] = 0;	/* constrained intra pred flag */
8360ed5401bSmrg	ib_cpu[len++] = 0;	/* cabac enable */
8370ed5401bSmrg	ib_cpu[len++] = 0;	/* cabac init idc */
8380ed5401bSmrg	ib_cpu[len++] = 1;	/* half pel enabled */
8390ed5401bSmrg	ib_cpu[len++] = 1;	/* quarter pel enabled */
8400ed5401bSmrg	ib_cpu[len++] = 100;	/* BASELINE profile */
8410ed5401bSmrg	ib_cpu[len++] = 11;	/* level */
842bbff01ceSmrg	if (vcn_ip_version_major >= 3) {
8430ed5401bSmrg		ib_cpu[len++] = 0;	/* b_picture_enabled */
8440ed5401bSmrg		ib_cpu[len++] = 0;	/* weighted_bipred_idc */
8450ed5401bSmrg	}
8460ed5401bSmrg	*st_size = (len - st_offset) * 4;
8470ed5401bSmrg
8480ed5401bSmrg	/* deblocking filter */
8490ed5401bSmrg	st_offset = len;
8500ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8510ed5401bSmrg	ib_cpu[len++] = 0x00200004;	/* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */
8520ed5401bSmrg	ib_cpu[len++] = 0;	/* disable deblocking filter idc */
8530ed5401bSmrg	ib_cpu[len++] = 0;	/* alpha c0 offset */
8540ed5401bSmrg	ib_cpu[len++] = 0;	/* tc offset */
8550ed5401bSmrg	ib_cpu[len++] = 0;	/* cb offset */
8560ed5401bSmrg	ib_cpu[len++] = 0;	/* cr offset */
8570ed5401bSmrg	*st_size = (len - st_offset) * 4;
8580ed5401bSmrg
8590ed5401bSmrg	/* layer control */
8600ed5401bSmrg	st_offset = len;
8610ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8620ed5401bSmrg	ib_cpu[len++] = 0x00000004;	/* RENCODE_IB_PARAM_LAYER_CONTROL */
8630ed5401bSmrg	ib_cpu[len++] = 1;	/* max temporal layer */
8640ed5401bSmrg	ib_cpu[len++] = 1;	/* no of temporal layer */
8650ed5401bSmrg	*st_size = (len - st_offset) * 4;
8660ed5401bSmrg
8670ed5401bSmrg	/* rc_session init */
8680ed5401bSmrg	st_offset = len;
8690ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8700ed5401bSmrg	ib_cpu[len++] = 0x00000006;	/* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */
8710ed5401bSmrg	ib_cpu[len++] = 0;	/* rate control */
8720ed5401bSmrg	ib_cpu[len++] = 48;	/* vbv buffer level */
8730ed5401bSmrg	*st_size = (len - st_offset) * 4;
8740ed5401bSmrg
8750ed5401bSmrg	/* quality params */
8760ed5401bSmrg	st_offset = len;
8770ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8780ed5401bSmrg	ib_cpu[len++] = 0x00000009;	/* RENCODE_IB_PARAM_QUALITY_PARAMS */
8790ed5401bSmrg	ib_cpu[len++] = 0;	/* vbaq mode */
8800ed5401bSmrg	ib_cpu[len++] = 0;	/* scene change sensitivity */
8810ed5401bSmrg	ib_cpu[len++] = 0;	/* scene change min idr interval */
8820ed5401bSmrg	ib_cpu[len++] = 0;
883bbff01ceSmrg	if (vcn_ip_version_major >= 3)
8840ed5401bSmrg		ib_cpu[len++] = 0;
8850ed5401bSmrg	*st_size = (len - st_offset) * 4;
8860ed5401bSmrg
8870ed5401bSmrg	/* layer select */
8880ed5401bSmrg	st_offset = len;
8890ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8900ed5401bSmrg	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
8910ed5401bSmrg	ib_cpu[len++] = 0;	/* temporal layer */
8920ed5401bSmrg	*st_size = (len - st_offset) * 4;
8930ed5401bSmrg
8940ed5401bSmrg	/* rc layer init */
8950ed5401bSmrg	st_offset = len;
8960ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
8970ed5401bSmrg	ib_cpu[len++] = 0x00000007;	/* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */
8980ed5401bSmrg	ib_cpu[len++] = 0;
8990ed5401bSmrg	ib_cpu[len++] = 0;
9000ed5401bSmrg	ib_cpu[len++] = 25;
9010ed5401bSmrg	ib_cpu[len++] = 1;
9020ed5401bSmrg	ib_cpu[len++] = 0x01312d00;
9030ed5401bSmrg	ib_cpu[len++] = 0;
9040ed5401bSmrg	ib_cpu[len++] = 0;
9050ed5401bSmrg	ib_cpu[len++] = 0;
9060ed5401bSmrg	*st_size = (len - st_offset) * 4;
9070ed5401bSmrg
9080ed5401bSmrg	/* layer select */
9090ed5401bSmrg	st_offset = len;
9100ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
9110ed5401bSmrg	ib_cpu[len++] = 0x00000005;	/* RENCODE_IB_PARAM_LAYER_SELECT */
9120ed5401bSmrg	ib_cpu[len++] = 0;	/* temporal layer */
9130ed5401bSmrg	*st_size = (len - st_offset) * 4;
9140ed5401bSmrg
9150ed5401bSmrg	/* rc per pic */
9160ed5401bSmrg	st_offset = len;
9170ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
9180ed5401bSmrg	ib_cpu[len++] = 0x00000008;	/* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */
9190ed5401bSmrg	ib_cpu[len++] = 20;
9200ed5401bSmrg	ib_cpu[len++] = 0;
9210ed5401bSmrg	ib_cpu[len++] = 51;
9220ed5401bSmrg	ib_cpu[len++] = 0;
9230ed5401bSmrg	ib_cpu[len++] = 1;
9240ed5401bSmrg	ib_cpu[len++] = 0;
9250ed5401bSmrg	ib_cpu[len++] = 1;
926bbff01ceSmrg	ib_cpu[len++] = 0;
9270ed5401bSmrg	*st_size = (len - st_offset) * 4;
9280ed5401bSmrg
9290ed5401bSmrg	/* op init rc */
9300ed5401bSmrg	st_offset = len;
9310ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
9320ed5401bSmrg	ib_cpu[len++] = 0x01000004;	/* RENCODE_IB_OP_INIT_RC */
9330ed5401bSmrg	*st_size = (len - st_offset) * 4;
9340ed5401bSmrg
9350ed5401bSmrg	/* op init rc vbv */
9360ed5401bSmrg	st_offset = len;
9370ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
9380ed5401bSmrg	ib_cpu[len++] = 0x01000005;	/* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */
9390ed5401bSmrg	*st_size = (len - st_offset) * 4;
9400ed5401bSmrg
9410ed5401bSmrg	*p_task_size = (len - task_offset) * 4;
9420ed5401bSmrg
943b0ab5608Smrg	if (vcn_unified_ring)
944b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
945b0ab5608Smrg
9460ed5401bSmrg	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
9470ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
9480ed5401bSmrg}
9490ed5401bSmrg
9500ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo)
9510ed5401bSmrg{
9520ed5401bSmrg	uint32_t ret;
9530ed5401bSmrg
9540ed5401bSmrg	ret = bs_read_ue (bufInfo);
9550ed5401bSmrg	if ((ret & 0x1) == 0) {
9560ed5401bSmrg		ret >>= 1;
9570ed5401bSmrg		int32_t temp = 0 - ret;
9580ed5401bSmrg		return temp;
9590ed5401bSmrg	}
9600ed5401bSmrg
9610ed5401bSmrg	return (ret + 1) >> 1;
9620ed5401bSmrg}
9630ed5401bSmrg
9640ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count)
9650ed5401bSmrg{
9660ed5401bSmrg	uint32_t val;
9670ed5401bSmrg
9680ed5401bSmrg	val = bs_read_u (bufInfo, count);
9690ed5401bSmrg	if (val != 0) {
9700ed5401bSmrg		printf ("field error - %d bits should be 0 is %x\n", count, val);
9710ed5401bSmrg	}
9720ed5401bSmrg}
9730ed5401bSmrg
9740ed5401bSmrgstatic inline int bs_eof(bufferInfo * bufinfo)
9750ed5401bSmrg{
9760ed5401bSmrg	if (bufinfo->decBuffer >= bufinfo->end)
9770ed5401bSmrg		return 1;
9780ed5401bSmrg	else
9790ed5401bSmrg		return 0;
9800ed5401bSmrg}
9810ed5401bSmrg
9820ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo)
9830ed5401bSmrg{
9840ed5401bSmrg	uint32_t r = 0;
9850ed5401bSmrg	uint32_t temp = 0;
9860ed5401bSmrg
9870ed5401bSmrg	bufinfo->numOfBitsInBuffer--;
9880ed5401bSmrg	if (! bs_eof(bufinfo)) {
9890ed5401bSmrg		temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer);
9900ed5401bSmrg		r = temp & 0x01;
9910ed5401bSmrg	}
9920ed5401bSmrg
9930ed5401bSmrg	if (bufinfo->numOfBitsInBuffer == 0) {
9940ed5401bSmrg		bufinfo->decBuffer++;
9950ed5401bSmrg		bufinfo->decData = *bufinfo->decBuffer;
9960ed5401bSmrg		bufinfo->numOfBitsInBuffer = 8;
9970ed5401bSmrg	}
9980ed5401bSmrg
9990ed5401bSmrg	return r;
10000ed5401bSmrg}
10010ed5401bSmrg
10020ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n)
10030ed5401bSmrg{
10040ed5401bSmrg	uint32_t r = 0;
10050ed5401bSmrg	int i;
10060ed5401bSmrg
10070ed5401bSmrg	for (i = 0; i < n; i++) {
10080ed5401bSmrg		r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) );
10090ed5401bSmrg	}
10100ed5401bSmrg
10110ed5401bSmrg	return r;
10120ed5401bSmrg}
10130ed5401bSmrg
10140ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo)
10150ed5401bSmrg{
10160ed5401bSmrg	int32_t r = 0;
10170ed5401bSmrg	int i = 0;
10180ed5401bSmrg
10190ed5401bSmrg	while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) {
10200ed5401bSmrg		i++;
10210ed5401bSmrg	}
10220ed5401bSmrg	r = bs_read_u(bufinfo, i);
10230ed5401bSmrg	r += (1 << i) - 1;
10240ed5401bSmrg	return r;
10250ed5401bSmrg}
10260ed5401bSmrg
10270ed5401bSmrgstatic uint32_t remove_03 (uint8_t * bptr, uint32_t len)
10280ed5401bSmrg{
10290ed5401bSmrg	uint32_t nal_len = 0;
10300ed5401bSmrg	while (nal_len + 2 < len) {
10310ed5401bSmrg		if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) {
10320ed5401bSmrg			bptr += 2;
10330ed5401bSmrg			nal_len += 2;
10340ed5401bSmrg			len--;
10350ed5401bSmrg			memmove (bptr, bptr + 1, len - nal_len);
10360ed5401bSmrg		} else {
10370ed5401bSmrg			bptr++;
10380ed5401bSmrg			nal_len++;
10390ed5401bSmrg		}
10400ed5401bSmrg	}
10410ed5401bSmrg	return len;
10420ed5401bSmrg}
10430ed5401bSmrg
10440ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo)
10450ed5401bSmrg{
10460ed5401bSmrg	uint32_t lastScale = 8, nextScale = 8;
10470ed5401bSmrg	uint32_t jx;
10480ed5401bSmrg	int deltaScale;
10490ed5401bSmrg
10500ed5401bSmrg	for (jx = 0; jx < sizeOfScalingList; jx++) {
10510ed5401bSmrg		if (nextScale != 0) {
10520ed5401bSmrg			deltaScale = h264_se (bufInfo);
10530ed5401bSmrg			nextScale = (lastScale + deltaScale + 256) % 256;
10540ed5401bSmrg		}
10550ed5401bSmrg		if (nextScale == 0) {
10560ed5401bSmrg			lastScale = lastScale;
10570ed5401bSmrg		} else {
10580ed5401bSmrg			lastScale = nextScale;
10590ed5401bSmrg		}
10600ed5401bSmrg	}
10610ed5401bSmrg}
10620ed5401bSmrg
10630ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo)
10640ed5401bSmrg{
10650ed5401bSmrg	uint32_t temp;
10660ed5401bSmrg
10670ed5401bSmrg	dec->profile = bs_read_u (bufInfo, 8);
10680ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set0_flag */
10690ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set1_flag */
10700ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set2_flag */
10710ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set3_flag */
10720ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set4_flag */
10730ed5401bSmrg	bs_read_u (bufInfo, 1);		/* constaint_set5_flag */
10740ed5401bSmrg
10750ed5401bSmrg
10760ed5401bSmrg	h264_check_0s (bufInfo, 2);
10770ed5401bSmrg	dec->level_idc = bs_read_u (bufInfo, 8);
10780ed5401bSmrg	bs_read_ue (bufInfo);	/* SPS id*/
10790ed5401bSmrg
10800ed5401bSmrg	if (dec->profile == 100 || dec->profile == 110 ||
10810ed5401bSmrg		dec->profile == 122 || dec->profile == 144) {
10820ed5401bSmrg		uint32_t chroma_format_idc = bs_read_ue (bufInfo);
10830ed5401bSmrg		if (chroma_format_idc == 3) {
10840ed5401bSmrg			bs_read_u (bufInfo, 1);	/* residual_colour_transform_flag */
10850ed5401bSmrg		}
10860ed5401bSmrg		bs_read_ue (bufInfo);	/* bit_depth_luma_minus8 */
10870ed5401bSmrg		bs_read_ue (bufInfo);	/* bit_depth_chroma_minus8 */
10880ed5401bSmrg		bs_read_u (bufInfo, 1);	/* qpprime_y_zero_transform_bypass_flag */
10890ed5401bSmrg		uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1);
10900ed5401bSmrg
10910ed5401bSmrg		if (seq_scaling_matrix_present_flag) {
10920ed5401bSmrg			for (uint32_t ix = 0; ix < 8; ix++) {
10930ed5401bSmrg				temp = bs_read_u (bufInfo, 1);
10940ed5401bSmrg				if (temp) {
10950ed5401bSmrg					scaling_list (ix, ix < 6 ? 16 : 64, bufInfo);
10960ed5401bSmrg				}
10970ed5401bSmrg			}
10980ed5401bSmrg		}
10990ed5401bSmrg	}
11000ed5401bSmrg
11010ed5401bSmrg	bs_read_ue (bufInfo);	/* log2_max_frame_num_minus4 */
11020ed5401bSmrg	uint32_t pic_order_cnt_type = bs_read_ue (bufInfo);
11030ed5401bSmrg
11040ed5401bSmrg	if (pic_order_cnt_type == 0) {
11050ed5401bSmrg		bs_read_ue (bufInfo);	/* log2_max_pic_order_cnt_lsb_minus4 */
11060ed5401bSmrg	} else if (pic_order_cnt_type == 1) {
11070ed5401bSmrg		bs_read_u (bufInfo, 1);	/* delta_pic_order_always_zero_flag */
11080ed5401bSmrg		h264_se (bufInfo);	/* offset_for_non_ref_pic */
11090ed5401bSmrg		h264_se (bufInfo);	/* offset_for_top_to_bottom_field */
11100ed5401bSmrg		temp = bs_read_ue (bufInfo);
11110ed5401bSmrg		for (uint32_t ix = 0; ix < temp; ix++) {
11120ed5401bSmrg			 h264_se (bufInfo);	/* offset_for_ref_frame[index] */
11130ed5401bSmrg		}
11140ed5401bSmrg	}
11150ed5401bSmrg	bs_read_ue (bufInfo);	/* num_ref_frames */
11160ed5401bSmrg	bs_read_u (bufInfo, 1);	/* gaps_in_frame_num_flag */
11170ed5401bSmrg	uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1;
11180ed5401bSmrg
11190ed5401bSmrg	dec->pic_width = PicWidthInMbs * 16;
11200ed5401bSmrg	uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1;
11210ed5401bSmrg
11220ed5401bSmrg	dec->pic_height = PicHeightInMapUnits * 16;
11230ed5401bSmrg	uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1);
11240ed5401bSmrg	if (!frame_mbs_only_flag) {
11250ed5401bSmrg		bs_read_u (bufInfo, 1);	/* mb_adaptive_frame_field_flag */
11260ed5401bSmrg	}
11270ed5401bSmrg	bs_read_u (bufInfo, 1);	/* direct_8x8_inference_flag */
11280ed5401bSmrg	temp = bs_read_u (bufInfo, 1);
11290ed5401bSmrg	if (temp) {
11300ed5401bSmrg		bs_read_ue (bufInfo);	/* frame_crop_left_offset */
11310ed5401bSmrg		bs_read_ue (bufInfo);	/* frame_crop_right_offset */
11320ed5401bSmrg		bs_read_ue (bufInfo);	/* frame_crop_top_offset */
11330ed5401bSmrg		bs_read_ue (bufInfo);	/* frame_crop_bottom_offset */
11340ed5401bSmrg	}
11350ed5401bSmrg	temp = bs_read_u (bufInfo, 1);	/* VUI Parameters  */
11360ed5401bSmrg}
11370ed5401bSmrg
11380ed5401bSmrgstatic void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo)
11390ed5401bSmrg{
11400ed5401bSmrg	uint32_t temp;
11410ed5401bSmrg
11420ed5401bSmrg	bs_read_ue (bufInfo);	/* first_mb_in_slice */
11430ed5401bSmrg	temp = bs_read_ue (bufInfo);
11440ed5401bSmrg	dec->slice_type = ((temp > 5) ? (temp - 5) : temp);
11450ed5401bSmrg}
11460ed5401bSmrg
11470ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo)
11480ed5401bSmrg{
11490ed5401bSmrg	uint8_t type = 0;
11500ed5401bSmrg
11510ed5401bSmrg	h264_check_0s (bufInfo, 1);
11520ed5401bSmrg	dec->nal_ref_idc = bs_read_u (bufInfo, 2);
11530ed5401bSmrg	dec->nal_unit_type = type = bs_read_u (bufInfo, 5);
11540ed5401bSmrg	switch (type)
11550ed5401bSmrg	{
11560ed5401bSmrg	case H264_NAL_TYPE_NON_IDR_SLICE:
11570ed5401bSmrg	case H264_NAL_TYPE_IDR_SLICE:
11580ed5401bSmrg		h264_slice_header (dec, bufInfo);
11590ed5401bSmrg		break;
11600ed5401bSmrg	case H264_NAL_TYPE_SEQ_PARAM:
11610ed5401bSmrg		h264_parse_sequence_parameter_set (dec, bufInfo);
11620ed5401bSmrg		break;
11630ed5401bSmrg	case H264_NAL_TYPE_PIC_PARAM:
11640ed5401bSmrg	case H264_NAL_TYPE_SEI:
11650ed5401bSmrg	case H264_NAL_TYPE_ACCESS_UNIT:
11660ed5401bSmrg	case H264_NAL_TYPE_SEQ_EXTENSION:
11670ed5401bSmrg		/* NOP */
11680ed5401bSmrg		break;
11690ed5401bSmrg	default:
11700ed5401bSmrg		printf ("Nal type unknown %d \n ", type);
11710ed5401bSmrg		break;
11720ed5401bSmrg	}
11730ed5401bSmrg	return type;
11740ed5401bSmrg}
11750ed5401bSmrg
11760ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen)
11770ed5401bSmrg{
11780ed5401bSmrg	uint32_t val;
11790ed5401bSmrg	uint32_t offset, startBytes;
11800ed5401bSmrg
11810ed5401bSmrg	offset = startBytes = 0;
11820ed5401bSmrg	if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) {
11830ed5401bSmrg		pBuf += 4;
11840ed5401bSmrg		offset = 4;
11850ed5401bSmrg		startBytes = 1;
11860ed5401bSmrg	} else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) {
11870ed5401bSmrg		pBuf += 3;
11880ed5401bSmrg		offset = 3;
11890ed5401bSmrg		startBytes = 1;
11900ed5401bSmrg	}
11910ed5401bSmrg	val = 0xffffffff;
11920ed5401bSmrg	while (offset < bufLen - 3) {
11930ed5401bSmrg		val <<= 8;
11940ed5401bSmrg		val |= *pBuf++;
11950ed5401bSmrg		offset++;
11960ed5401bSmrg		if (val == H264_START_CODE)
11970ed5401bSmrg			return offset - 4;
11980ed5401bSmrg
11990ed5401bSmrg		if ((val & 0x00ffffff) == H264_START_CODE)
12000ed5401bSmrg			return offset - 3;
12010ed5401bSmrg	}
12020ed5401bSmrg	if (bufLen - offset <= 3 && startBytes == 0) {
12030ed5401bSmrg		startBytes = 0;
12040ed5401bSmrg		return 0;
12050ed5401bSmrg	}
12060ed5401bSmrg
12070ed5401bSmrg	return offset;
12080ed5401bSmrg}
12090ed5401bSmrg
12100ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size)
12110ed5401bSmrg{
12120ed5401bSmrg	uint32_t buffer_pos = 0;
12130ed5401bSmrg	int done = 0;
12140ed5401bSmrg	h264_decode dec;
12150ed5401bSmrg
12160ed5401bSmrg	memset(&dec, 0, sizeof(h264_decode));
12170ed5401bSmrg	do {
12180ed5401bSmrg		uint32_t ret;
12190ed5401bSmrg
12200ed5401bSmrg		ret = h264_find_next_start_code (buffer + buffer_pos,
12210ed5401bSmrg				 buffer_size - buffer_pos);
12220ed5401bSmrg		if (ret == 0) {
12230ed5401bSmrg			done = 1;
12240ed5401bSmrg			if (buffer_pos == 0) {
12250ed5401bSmrg				fprintf (stderr,
12260ed5401bSmrg				 "couldn't find start code in buffer from 0\n");
12270ed5401bSmrg			}
12280ed5401bSmrg		} else {
12290ed5401bSmrg		/* have a complete NAL from buffer_pos to end */
12300ed5401bSmrg			if (ret > 3) {
12310ed5401bSmrg				uint32_t nal_len;
12320ed5401bSmrg				bufferInfo bufinfo;
12330ed5401bSmrg
12340ed5401bSmrg				nal_len = remove_03 (buffer + buffer_pos, ret);
12350ed5401bSmrg				bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4);
12360ed5401bSmrg				bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8;
12370ed5401bSmrg				bufinfo.end = buffer + buffer_pos + nal_len;
12380ed5401bSmrg				bufinfo.numOfBitsInBuffer = 8;
12390ed5401bSmrg				bufinfo.decData = *bufinfo.decBuffer;
12400ed5401bSmrg				h264_parse_nal (&dec, &bufinfo);
12410ed5401bSmrg			}
12420ed5401bSmrg			buffer_pos += ret;	/*  buffer_pos points to next code */
12430ed5401bSmrg		}
12440ed5401bSmrg	} while (done == 0);
12450ed5401bSmrg
12460ed5401bSmrg	if ((dec.pic_width == gWidth) &&
12470ed5401bSmrg		(dec.pic_height == gHeight) &&
12480ed5401bSmrg		(dec.slice_type == gSliceType))
12490ed5401bSmrg	    return 0;
12500ed5401bSmrg	else
12510ed5401bSmrg		return -1;
12520ed5401bSmrg}
12530ed5401bSmrg
12540ed5401bSmrgstatic void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type)
12550ed5401bSmrg{
12560ed5401bSmrg	uint32_t *fb_ptr;
12570ed5401bSmrg	uint8_t *bs_ptr;
12580ed5401bSmrg	uint32_t size;
12590ed5401bSmrg	int r;
12600ed5401bSmrg/* 	uint64_t s[3] = {0, 1121279001727, 1059312481445}; */
12610ed5401bSmrg
12620ed5401bSmrg	r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr);
12630ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
12640ed5401bSmrg	fb_ptr = (uint32_t*)fb_buf.ptr;
12650ed5401bSmrg	size = fb_ptr[6];
12660ed5401bSmrg	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
12670ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
12680ed5401bSmrg	r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr);
12690ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
12700ed5401bSmrg
12710ed5401bSmrg	bs_ptr = (uint8_t*)bs_buf.ptr;
12720ed5401bSmrg	r = verify_checksum(bs_ptr, size);
12730ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
12740ed5401bSmrg	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
12750ed5401bSmrg
12760ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
12770ed5401bSmrg}
12780ed5401bSmrg
1279bbff01ceSmrgstatic void amdgpu_cs_vcn_ib_zero_count(int *len, int num)
1280bbff01ceSmrg{
1281bbff01ceSmrg	for (int i = 0; i < num; i++)
1282bbff01ceSmrg		ib_cpu[(*len)++] = 0;
1283bbff01ceSmrg}
1284bbff01ceSmrg
12850ed5401bSmrgstatic void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
12860ed5401bSmrg{
1287bbff01ceSmrg	struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf;
1288bbff01ceSmrg	int len, r;
12890ed5401bSmrg	unsigned width = 160, height = 128, buf_size;
12900ed5401bSmrg	uint32_t *p_task_size = NULL;
12910ed5401bSmrg	uint32_t task_offset = 0, st_offset;
12920ed5401bSmrg	uint32_t *st_size = NULL;
12930ed5401bSmrg	uint32_t fw_maj = 1, fw_min = 9;
12940ed5401bSmrg
1295b0ab5608Smrg	if (vcn_ip_version_major == 2) {
12960ed5401bSmrg		fw_maj = 1;
12970ed5401bSmrg		fw_min = 1;
1298b0ab5608Smrg	} else if (vcn_ip_version_major == 3) {
12990ed5401bSmrg		fw_maj = 1;
13000ed5401bSmrg		fw_min = 0;
13010ed5401bSmrg	}
13020ed5401bSmrg	gSliceType = frame_type;
13030ed5401bSmrg	buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
13040ed5401bSmrg
13050ed5401bSmrg	num_resources = 0;
13060ed5401bSmrg	alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
13070ed5401bSmrg	alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1308bbff01ceSmrg	alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT);
13090ed5401bSmrg	resources[num_resources++] = enc_buf.handle;
13100ed5401bSmrg	resources[num_resources++] = cpb_buf.handle;
13110ed5401bSmrg	resources[num_resources++] = bs_buf.handle;
13120ed5401bSmrg	resources[num_resources++] = fb_buf.handle;
1313bbff01ceSmrg	resources[num_resources++] = input_buf.handle;
13140ed5401bSmrg	resources[num_resources++] = ib_handle;
13150ed5401bSmrg
13160ed5401bSmrg
13170ed5401bSmrg	r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr);
13180ed5401bSmrg	memset(bs_buf.ptr, 0, 4096);
13190ed5401bSmrg	r = amdgpu_bo_cpu_unmap(bs_buf.handle);
13200ed5401bSmrg
13210ed5401bSmrg	r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr);
13220ed5401bSmrg	memset(fb_buf.ptr, 0, 4096);
13230ed5401bSmrg	r = amdgpu_bo_cpu_unmap(fb_buf.handle);
13240ed5401bSmrg
1325bbff01ceSmrg	r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr);
13260ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
13270ed5401bSmrg
13280ed5401bSmrg	for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++)
1329bbff01ceSmrg		memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width);
13300ed5401bSmrg
1331bbff01ceSmrg	r = amdgpu_bo_cpu_unmap(input_buf.handle);
13320ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
13330ed5401bSmrg
13340ed5401bSmrg	len = 0;
1335b0ab5608Smrg
1336b0ab5608Smrg	if (vcn_unified_ring)
1337b0ab5608Smrg		amdgpu_cs_sq_head(ib_cpu, &len, true);
1338b0ab5608Smrg
13390ed5401bSmrg	/* session info */
13400ed5401bSmrg	st_offset = len;
13410ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
13420ed5401bSmrg	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
13430ed5401bSmrg	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
13440ed5401bSmrg	ib_cpu[len++] = enc_buf.addr >> 32;
13450ed5401bSmrg	ib_cpu[len++] = enc_buf.addr;
13460ed5401bSmrg	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE */;
13470ed5401bSmrg	*st_size = (len - st_offset) * 4;
13480ed5401bSmrg
13490ed5401bSmrg	/* task info */
13500ed5401bSmrg	task_offset = len;
13510ed5401bSmrg	st_offset = len;
13520ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
13530ed5401bSmrg	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
13540ed5401bSmrg	p_task_size = &ib_cpu[len++];
13550ed5401bSmrg	ib_cpu[len++] = enc_task_id++;	/* task_id */
13560ed5401bSmrg	ib_cpu[len++] = 1;	/* feedback */
13570ed5401bSmrg	*st_size = (len - st_offset) * 4;
13580ed5401bSmrg
13590ed5401bSmrg	if (frame_type == 2) {
13600ed5401bSmrg		/* sps */
13610ed5401bSmrg		st_offset = len;
13620ed5401bSmrg		st_size = &ib_cpu[len++];	/* size */
1363b0ab5608Smrg		if(vcn_ip_version_major == 1)
13640ed5401bSmrg			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */
13650ed5401bSmrg		else
1366bbff01ceSmrg			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */
13670ed5401bSmrg		ib_cpu[len++] = 0x00000002;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */
13680ed5401bSmrg		ib_cpu[len++] = 0x00000011;	/* sps len */
13690ed5401bSmrg		ib_cpu[len++] = 0x00000001;	/* start code */
13700ed5401bSmrg		ib_cpu[len++] = 0x6764440b;
13710ed5401bSmrg		ib_cpu[len++] = 0xac54c284;
13720ed5401bSmrg		ib_cpu[len++] = 0x68078442;
13730ed5401bSmrg		ib_cpu[len++] = 0x37000000;
13740ed5401bSmrg		*st_size = (len - st_offset) * 4;
13750ed5401bSmrg
13760ed5401bSmrg		/* pps */
13770ed5401bSmrg		st_offset = len;
13780ed5401bSmrg		st_size = &ib_cpu[len++];	/* size */
1379b0ab5608Smrg		if(vcn_ip_version_major == 1)
13800ed5401bSmrg			ib_cpu[len++] = 0x00000020;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/
13810ed5401bSmrg		else
1382bbff01ceSmrg			ib_cpu[len++] = 0x0000000a;	/* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/
13830ed5401bSmrg		ib_cpu[len++] = 0x00000003;	/* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */
13840ed5401bSmrg		ib_cpu[len++] = 0x00000008;	/* pps len */
13850ed5401bSmrg		ib_cpu[len++] = 0x00000001;	/* start code */
13860ed5401bSmrg		ib_cpu[len++] = 0x68ce3c80;
13870ed5401bSmrg		*st_size = (len - st_offset) * 4;
13880ed5401bSmrg	}
13890ed5401bSmrg
13900ed5401bSmrg	/* slice header */
13910ed5401bSmrg	st_offset = len;
13920ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
1393b0ab5608Smrg	if(vcn_ip_version_major == 1)
13940ed5401bSmrg		ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */
13950ed5401bSmrg	else
1396bbff01ceSmrg		ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */
13970ed5401bSmrg	if (frame_type == 2) {
13980ed5401bSmrg		ib_cpu[len++] = 0x65000000;
13990ed5401bSmrg		ib_cpu[len++] = 0x11040000;
14000ed5401bSmrg	} else {
14010ed5401bSmrg		ib_cpu[len++] = 0x41000000;
14020ed5401bSmrg		ib_cpu[len++] = 0x34210000;
14030ed5401bSmrg	}
14040ed5401bSmrg	ib_cpu[len++] = 0xe0000000;
1405bbff01ceSmrg	amdgpu_cs_vcn_ib_zero_count(&len, 13);
14060ed5401bSmrg
14070ed5401bSmrg	ib_cpu[len++] = 0x00000001;
14080ed5401bSmrg	ib_cpu[len++] = 0x00000008;
14090ed5401bSmrg	ib_cpu[len++] = 0x00020000;
14100ed5401bSmrg	ib_cpu[len++] = 0x00000000;
14110ed5401bSmrg	ib_cpu[len++] = 0x00000001;
14120ed5401bSmrg	ib_cpu[len++] = 0x00000015;
14130ed5401bSmrg	ib_cpu[len++] = 0x00020001;
14140ed5401bSmrg	ib_cpu[len++] = 0x00000000;
14150ed5401bSmrg	ib_cpu[len++] = 0x00000001;
14160ed5401bSmrg	ib_cpu[len++] = 0x00000003;
1417bbff01ceSmrg	amdgpu_cs_vcn_ib_zero_count(&len, 22);
14180ed5401bSmrg	*st_size = (len - st_offset) * 4;
14190ed5401bSmrg
14200ed5401bSmrg	/* encode params */
14210ed5401bSmrg	st_offset = len;
14220ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
1423b0ab5608Smrg	if(vcn_ip_version_major == 1)
1424bbff01ceSmrg		ib_cpu[len++] = 0x0000000b;	/* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */
14250ed5401bSmrg	else
1426bbff01ceSmrg		ib_cpu[len++] = 0x0000000f;	/* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */
14270ed5401bSmrg	ib_cpu[len++] = frame_type;
14280ed5401bSmrg	ib_cpu[len++] = 0x0001f000;
1429bbff01ceSmrg	ib_cpu[len++] = input_buf.addr >> 32;
1430bbff01ceSmrg	ib_cpu[len++] = input_buf.addr;
1431bbff01ceSmrg	ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32;
1432bbff01ceSmrg	ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32);
14330ed5401bSmrg	ib_cpu[len++] = 0x00000100;
14340ed5401bSmrg	ib_cpu[len++] = 0x00000080;
14350ed5401bSmrg	ib_cpu[len++] = 0x00000000;
14360ed5401bSmrg	ib_cpu[len++] = 0xffffffff;
14370ed5401bSmrg	ib_cpu[len++] = 0x00000000;
14380ed5401bSmrg	*st_size = (len - st_offset) * 4;
14390ed5401bSmrg
14400ed5401bSmrg	/* encode params h264 */
14410ed5401bSmrg	st_offset = len;
14420ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
14430ed5401bSmrg	ib_cpu[len++] = 0x00200003;	/* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */
1444bbff01ceSmrg	if (vcn_ip_version_major <= 2) {
14450ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14460ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14470ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14480ed5401bSmrg		ib_cpu[len++] = 0xffffffff;
14490ed5401bSmrg	} else {
14500ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14510ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14520ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14530ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14540ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14550ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14560ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14570ed5401bSmrg		ib_cpu[len++] = 0xffffffff;
14580ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14590ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14600ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14610ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14620ed5401bSmrg		ib_cpu[len++] = 0xffffffff;
14630ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14640ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14650ed5401bSmrg		ib_cpu[len++] = 0x00000000;
14660ed5401bSmrg		ib_cpu[len++] = 0x00000000;
1467bbff01ceSmrg		ib_cpu[len++] = 0x00000001;
14680ed5401bSmrg	}
14690ed5401bSmrg	*st_size = (len - st_offset) * 4;
14700ed5401bSmrg
14710ed5401bSmrg	/* encode context */
14720ed5401bSmrg	st_offset = len;
14730ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
1474b0ab5608Smrg	if(vcn_ip_version_major == 1)
14750ed5401bSmrg		ib_cpu[len++] = 0x0000000d;	/* ENCODE_CONTEXT_BUFFER  vcn 1 */
14760ed5401bSmrg	else
1477bbff01ceSmrg		ib_cpu[len++] = 0x00000011;	/* ENCODE_CONTEXT_BUFFER  other vcn */
14780ed5401bSmrg	ib_cpu[len++] = cpb_buf.addr >> 32;
14790ed5401bSmrg	ib_cpu[len++] = cpb_buf.addr;
14800ed5401bSmrg	ib_cpu[len++] = 0x00000000;	/* swizzle mode */
14810ed5401bSmrg	ib_cpu[len++] = 0x00000100;	/* luma pitch */
14820ed5401bSmrg	ib_cpu[len++] = 0x00000100;	/* chroma pitch */
1483bbff01ceSmrg	ib_cpu[len++] = 0x00000002; /* no reconstructed picture */
14840ed5401bSmrg	ib_cpu[len++] = 0x00000000;	/* reconstructed pic 1 luma offset */
14850ed5401bSmrg	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32);	/* pic1 chroma offset */
1486bbff01ceSmrg	if(vcn_ip_version_major == 4)
1487bbff01ceSmrg		amdgpu_cs_vcn_ib_zero_count(&len, 2);
14880ed5401bSmrg	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;	/* pic2 luma offset */
14890ed5401bSmrg	ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2;	/* pic2 chroma offset */
14900ed5401bSmrg
1491bbff01ceSmrg	amdgpu_cs_vcn_ib_zero_count(&len, 280);
14920ed5401bSmrg	*st_size = (len - st_offset) * 4;
14930ed5401bSmrg
14940ed5401bSmrg	/* bitstream buffer */
14950ed5401bSmrg	st_offset = len;
14960ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
1497b0ab5608Smrg	if(vcn_ip_version_major == 1)
14980ed5401bSmrg		ib_cpu[len++] = 0x0000000e;	/* VIDEO_BITSTREAM_BUFFER vcn 1 */
14990ed5401bSmrg	else
1500bbff01ceSmrg		ib_cpu[len++] = 0x00000012;	/* VIDEO_BITSTREAM_BUFFER other vcn */
1501bbff01ceSmrg
15020ed5401bSmrg	ib_cpu[len++] = 0x00000000;	/* mode */
15030ed5401bSmrg	ib_cpu[len++] = bs_buf.addr >> 32;
15040ed5401bSmrg	ib_cpu[len++] = bs_buf.addr;
15050ed5401bSmrg	ib_cpu[len++] = 0x0001f000;
15060ed5401bSmrg	ib_cpu[len++] = 0x00000000;
15070ed5401bSmrg	*st_size = (len - st_offset) * 4;
15080ed5401bSmrg
15090ed5401bSmrg	/* feedback */
15100ed5401bSmrg	st_offset = len;
15110ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
1512b0ab5608Smrg	if(vcn_ip_version_major == 1)
15130ed5401bSmrg		ib_cpu[len++] = 0x00000010;	/* FEEDBACK_BUFFER vcn 1 */
15140ed5401bSmrg	else
15150ed5401bSmrg		ib_cpu[len++] = 0x00000015;	/* FEEDBACK_BUFFER vcn 2,3 */
15160ed5401bSmrg	ib_cpu[len++] = 0x00000000;
15170ed5401bSmrg	ib_cpu[len++] = fb_buf.addr >> 32;
15180ed5401bSmrg	ib_cpu[len++] = fb_buf.addr;
15190ed5401bSmrg	ib_cpu[len++] = 0x00000010;
15200ed5401bSmrg	ib_cpu[len++] = 0x00000028;
15210ed5401bSmrg	*st_size = (len - st_offset) * 4;
15220ed5401bSmrg
15230ed5401bSmrg	/* intra refresh */
15240ed5401bSmrg	st_offset = len;
15250ed5401bSmrg	st_size = &ib_cpu[len++];
1526b0ab5608Smrg	if(vcn_ip_version_major == 1)
15270ed5401bSmrg		ib_cpu[len++] = 0x0000000c;	/* INTRA_REFRESH vcn 1 */
15280ed5401bSmrg	else
15290ed5401bSmrg		ib_cpu[len++] = 0x00000010;	/* INTRA_REFRESH vcn 2,3 */
15300ed5401bSmrg	ib_cpu[len++] = 0x00000000;
15310ed5401bSmrg	ib_cpu[len++] = 0x00000000;
15320ed5401bSmrg	ib_cpu[len++] = 0x00000000;
15330ed5401bSmrg	*st_size = (len - st_offset) * 4;
15340ed5401bSmrg
1535b0ab5608Smrg	if(vcn_ip_version_major != 1) {
15360ed5401bSmrg		/* Input Format */
15370ed5401bSmrg		st_offset = len;
15380ed5401bSmrg		st_size = &ib_cpu[len++];
15390ed5401bSmrg		ib_cpu[len++] = 0x0000000c;
15400ed5401bSmrg		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
15410ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15420ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15430ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15440ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15450ed5401bSmrg		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
15460ed5401bSmrg		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_PACKING_FORMAT_NV12 */
15470ed5401bSmrg		*st_size = (len - st_offset) * 4;
15480ed5401bSmrg
15490ed5401bSmrg		/* Output Format */
15500ed5401bSmrg		st_offset = len;
15510ed5401bSmrg		st_size = &ib_cpu[len++];
15520ed5401bSmrg		ib_cpu[len++] = 0x0000000d;
15530ed5401bSmrg		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_VOLUME_G22_BT709 */
15540ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15550ed5401bSmrg		ib_cpu[len++] = 0x00000000;
15560ed5401bSmrg		ib_cpu[len++] = 0x00000000;	/* RENCODE_COLOR_BIT_DEPTH_8_BIT */
15570ed5401bSmrg		*st_size = (len - st_offset) * 4;
15580ed5401bSmrg	}
15590ed5401bSmrg	/* op_speed */
15600ed5401bSmrg	st_offset = len;
15610ed5401bSmrg	st_size = &ib_cpu[len++];
15620ed5401bSmrg	ib_cpu[len++] = 0x01000006;	/* SPEED_ENCODING_MODE */
15630ed5401bSmrg	*st_size = (len - st_offset) * 4;
15640ed5401bSmrg
15650ed5401bSmrg	/* op_enc */
15660ed5401bSmrg	st_offset = len;
15670ed5401bSmrg	st_size = &ib_cpu[len++];
15680ed5401bSmrg	ib_cpu[len++] = 0x01000003;
15690ed5401bSmrg	*st_size = (len - st_offset) * 4;
15700ed5401bSmrg
15710ed5401bSmrg	*p_task_size = (len - task_offset) * 4;
1572b0ab5608Smrg
1573b0ab5608Smrg	if (vcn_unified_ring)
1574b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1575b0ab5608Smrg
15760ed5401bSmrg	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
15770ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
15780ed5401bSmrg
15790ed5401bSmrg	/* check result */
15800ed5401bSmrg	check_result(fb_buf, bs_buf, frame_type);
15810ed5401bSmrg
15820ed5401bSmrg	free_resource(&fb_buf);
15830ed5401bSmrg	free_resource(&bs_buf);
1584bbff01ceSmrg	free_resource(&input_buf);
1585d8807b2fSmrg}
1586d8807b2fSmrg
1587d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void)
1588d8807b2fSmrg{
15890ed5401bSmrg	amdgpu_cs_vcn_enc_encode_frame(2);	/* IDR frame */
1590d8807b2fSmrg}
1591d8807b2fSmrg
1592d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void)
1593d8807b2fSmrg{
15940ed5401bSmrg	int len = 0, r;
15950ed5401bSmrg	uint32_t *p_task_size = NULL;
15960ed5401bSmrg	uint32_t task_offset = 0, st_offset;
15970ed5401bSmrg	uint32_t *st_size = NULL;
15980ed5401bSmrg	uint32_t fw_maj = 1, fw_min = 9;
15990ed5401bSmrg
1600b0ab5608Smrg	if (vcn_ip_version_major == 2) {
16010ed5401bSmrg		fw_maj = 1;
16020ed5401bSmrg		fw_min = 1;
1603b0ab5608Smrg	} else if (vcn_ip_version_major == 3) {
16040ed5401bSmrg		fw_maj = 1;
16050ed5401bSmrg		fw_min = 0;
16060ed5401bSmrg	}
16070ed5401bSmrg
16080ed5401bSmrg	num_resources = 0;
16090ed5401bSmrg/* 	alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */
16100ed5401bSmrg	resources[num_resources++] = enc_buf.handle;
16110ed5401bSmrg	resources[num_resources++] = ib_handle;
16120ed5401bSmrg
1613b0ab5608Smrg	if (vcn_unified_ring)
1614b0ab5608Smrg		amdgpu_cs_sq_head(ib_cpu, &len, true);
1615b0ab5608Smrg
16160ed5401bSmrg	/* session info */
16170ed5401bSmrg	st_offset = len;
16180ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
16190ed5401bSmrg	ib_cpu[len++] = 0x00000001;	/* RENCODE_IB_PARAM_SESSION_INFO */
16200ed5401bSmrg	ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
16210ed5401bSmrg	ib_cpu[len++] = enc_buf.addr >> 32;
16220ed5401bSmrg	ib_cpu[len++] = enc_buf.addr;
16230ed5401bSmrg	ib_cpu[len++] = 1;	/* RENCODE_ENGINE_TYPE_ENCODE; */
16240ed5401bSmrg	*st_size = (len - st_offset) * 4;
16250ed5401bSmrg
16260ed5401bSmrg	/* task info */
16270ed5401bSmrg	task_offset = len;
16280ed5401bSmrg	st_offset = len;
16290ed5401bSmrg	st_size = &ib_cpu[len++];	/* size */
16300ed5401bSmrg	ib_cpu[len++] = 0x00000002;	/* RENCODE_IB_PARAM_TASK_INFO */
16310ed5401bSmrg	p_task_size = &ib_cpu[len++];
16320ed5401bSmrg	ib_cpu[len++] = enc_task_id++;	/* task_id */
16330ed5401bSmrg	ib_cpu[len++] = 0;	/* feedback */
16340ed5401bSmrg	*st_size = (len - st_offset) * 4;
16350ed5401bSmrg
16360ed5401bSmrg	/*  op close */
16370ed5401bSmrg	st_offset = len;
16380ed5401bSmrg	st_size = &ib_cpu[len++];
16390ed5401bSmrg	ib_cpu[len++] = 0x01000002;	/* RENCODE_IB_OP_CLOSE_SESSION */
16400ed5401bSmrg	*st_size = (len - st_offset) * 4;
16410ed5401bSmrg
16420ed5401bSmrg	*p_task_size = (len - task_offset) * 4;
16430ed5401bSmrg
1644b0ab5608Smrg	if (vcn_unified_ring)
1645b0ab5608Smrg		amdgpu_cs_sq_ib_tail(ib_cpu + len);
1646b0ab5608Smrg
16470ed5401bSmrg	r = submit(len, AMDGPU_HW_IP_VCN_ENC);
16480ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
16490ed5401bSmrg
16500ed5401bSmrg	free_resource(&cpb_buf);
16510ed5401bSmrg	free_resource(&enc_buf);
1652d8807b2fSmrg}
1653