1d8807b2fSmrg/* 2d8807b2fSmrg * Copyright 2017 Advanced Micro Devices, Inc. 3d8807b2fSmrg * 4d8807b2fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5d8807b2fSmrg * copy of this software and associated documentation files (the "Software"), 6d8807b2fSmrg * to deal in the Software without restriction, including without limitation 7d8807b2fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d8807b2fSmrg * and/or sell copies of the Software, and to permit persons to whom the 9d8807b2fSmrg * Software is furnished to do so, subject to the following conditions: 10d8807b2fSmrg * 11d8807b2fSmrg * The above copyright notice and this permission notice shall be included in 12d8807b2fSmrg * all copies or substantial portions of the Software. 13d8807b2fSmrg * 14d8807b2fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d8807b2fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d8807b2fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d8807b2fSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18d8807b2fSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19d8807b2fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20d8807b2fSmrg * OTHER DEALINGS IN THE SOFTWARE. 21d8807b2fSmrg * 22d8807b2fSmrg*/ 23d8807b2fSmrg 24d8807b2fSmrg#include <stdio.h> 250ed5401bSmrg#include <string.h> 26d8807b2fSmrg#include <inttypes.h> 270ed5401bSmrg#include <unistd.h> 28d8807b2fSmrg 29d8807b2fSmrg#include "CUnit/Basic.h" 30d8807b2fSmrg 31b0ab5608Smrg#include <unistd.h> 32d8807b2fSmrg#include "util_math.h" 33d8807b2fSmrg 34d8807b2fSmrg#include "amdgpu_test.h" 35d8807b2fSmrg#include "amdgpu_drm.h" 36d8807b2fSmrg#include "amdgpu_internal.h" 37d8807b2fSmrg#include "decode_messages.h" 380ed5401bSmrg#include "frame.h" 39d8807b2fSmrg 40d8807b2fSmrg#define IB_SIZE 4096 41d8807b2fSmrg#define MAX_RESOURCES 16 42d8807b2fSmrg 43b0ab5608Smrg#define DECODE_CMD_MSG_BUFFER 0x00000000 44b0ab5608Smrg#define DECODE_CMD_DPB_BUFFER 0x00000001 45b0ab5608Smrg#define DECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 46b0ab5608Smrg#define DECODE_CMD_FEEDBACK_BUFFER 0x00000003 47b0ab5608Smrg#define DECODE_CMD_PROB_TBL_BUFFER 0x00000004 48b0ab5608Smrg#define DECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 49b0ab5608Smrg#define DECODE_CMD_BITSTREAM_BUFFER 0x00000100 50b0ab5608Smrg#define DECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 51b0ab5608Smrg#define DECODE_CMD_CONTEXT_BUFFER 0x00000206 52b0ab5608Smrg 53b0ab5608Smrg#define DECODE_IB_PARAM_DECODE_BUFFER (0x00000001) 54b0ab5608Smrg 55b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_MSG_BUFFER (0x00000001) 56b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DPB_BUFFER (0x00000002) 57b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER (0x00000004) 58b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER (0x00000008) 59b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER (0x00000010) 60b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER (0x00000200) 61b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER (0x00000800) 62b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER (0x00001000) 63b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000) 64b0ab5608Smrg 65b0ab5608Smrgstatic bool vcn_dec_sw_ring = false; 66b0ab5608Smrgstatic bool vcn_unified_ring = false; 67b0ab5608Smrg 680ed5401bSmrg#define H264_NAL_TYPE_NON_IDR_SLICE 1 690ed5401bSmrg#define H264_NAL_TYPE_DP_A_SLICE 2 700ed5401bSmrg#define H264_NAL_TYPE_DP_B_SLICE 3 710ed5401bSmrg#define H264_NAL_TYPE_DP_C_SLICE 0x4 720ed5401bSmrg#define H264_NAL_TYPE_IDR_SLICE 0x5 730ed5401bSmrg#define H264_NAL_TYPE_SEI 0x6 740ed5401bSmrg#define H264_NAL_TYPE_SEQ_PARAM 0x7 750ed5401bSmrg#define H264_NAL_TYPE_PIC_PARAM 0x8 760ed5401bSmrg#define H264_NAL_TYPE_ACCESS_UNIT 0x9 770ed5401bSmrg#define H264_NAL_TYPE_END_OF_SEQ 0xa 780ed5401bSmrg#define H264_NAL_TYPE_END_OF_STREAM 0xb 790ed5401bSmrg#define H264_NAL_TYPE_FILLER_DATA 0xc 800ed5401bSmrg#define H264_NAL_TYPE_SEQ_EXTENSION 0xd 810ed5401bSmrg 820ed5401bSmrg#define H264_START_CODE 0x000001 830ed5401bSmrg 84d8807b2fSmrgstruct amdgpu_vcn_bo { 85d8807b2fSmrg amdgpu_bo_handle handle; 86d8807b2fSmrg amdgpu_va_handle va_handle; 87d8807b2fSmrg uint64_t addr; 88d8807b2fSmrg uint64_t size; 89d8807b2fSmrg uint8_t *ptr; 90d8807b2fSmrg}; 91d8807b2fSmrg 92b0ab5608Smrgtypedef struct rvcn_decode_buffer_s { 93b0ab5608Smrg unsigned int valid_buf_flag; 94b0ab5608Smrg unsigned int msg_buffer_address_hi; 95b0ab5608Smrg unsigned int msg_buffer_address_lo; 96b0ab5608Smrg unsigned int dpb_buffer_address_hi; 97b0ab5608Smrg unsigned int dpb_buffer_address_lo; 98b0ab5608Smrg unsigned int target_buffer_address_hi; 99b0ab5608Smrg unsigned int target_buffer_address_lo; 100b0ab5608Smrg unsigned int session_contex_buffer_address_hi; 101b0ab5608Smrg unsigned int session_contex_buffer_address_lo; 102b0ab5608Smrg unsigned int bitstream_buffer_address_hi; 103b0ab5608Smrg unsigned int bitstream_buffer_address_lo; 104b0ab5608Smrg unsigned int context_buffer_address_hi; 105b0ab5608Smrg unsigned int context_buffer_address_lo; 106b0ab5608Smrg unsigned int feedback_buffer_address_hi; 107b0ab5608Smrg unsigned int feedback_buffer_address_lo; 108b0ab5608Smrg unsigned int luma_hist_buffer_address_hi; 109b0ab5608Smrg unsigned int luma_hist_buffer_address_lo; 110b0ab5608Smrg unsigned int prob_tbl_buffer_address_hi; 111b0ab5608Smrg unsigned int prob_tbl_buffer_address_lo; 112b0ab5608Smrg unsigned int sclr_coeff_buffer_address_hi; 113b0ab5608Smrg unsigned int sclr_coeff_buffer_address_lo; 114b0ab5608Smrg unsigned int it_sclr_table_buffer_address_hi; 115b0ab5608Smrg unsigned int it_sclr_table_buffer_address_lo; 116b0ab5608Smrg unsigned int sclr_target_buffer_address_hi; 117b0ab5608Smrg unsigned int sclr_target_buffer_address_lo; 118b0ab5608Smrg unsigned int cenc_size_info_buffer_address_hi; 119b0ab5608Smrg unsigned int cenc_size_info_buffer_address_lo; 120b0ab5608Smrg unsigned int mpeg2_pic_param_buffer_address_hi; 121b0ab5608Smrg unsigned int mpeg2_pic_param_buffer_address_lo; 122b0ab5608Smrg unsigned int mpeg2_mb_control_buffer_address_hi; 123b0ab5608Smrg unsigned int mpeg2_mb_control_buffer_address_lo; 124b0ab5608Smrg unsigned int mpeg2_idct_coeff_buffer_address_hi; 125b0ab5608Smrg unsigned int mpeg2_idct_coeff_buffer_address_lo; 126b0ab5608Smrg} rvcn_decode_buffer_t; 127b0ab5608Smrg 128b0ab5608Smrgtypedef struct rvcn_decode_ib_package_s { 129b0ab5608Smrg unsigned int package_size; 130b0ab5608Smrg unsigned int package_type; 131b0ab5608Smrg} rvcn_decode_ib_package_t; 132b0ab5608Smrg 133b0ab5608Smrg 1345324fb0dSmrgstruct amdgpu_vcn_reg { 1355324fb0dSmrg uint32_t data0; 1365324fb0dSmrg uint32_t data1; 1375324fb0dSmrg uint32_t cmd; 1385324fb0dSmrg uint32_t nop; 1395324fb0dSmrg uint32_t cntl; 1405324fb0dSmrg}; 1415324fb0dSmrg 1420ed5401bSmrgtypedef struct BufferInfo_t { 1430ed5401bSmrg uint32_t numOfBitsInBuffer; 1440ed5401bSmrg const uint8_t *decBuffer; 1450ed5401bSmrg uint8_t decData; 1460ed5401bSmrg uint32_t decBufferSize; 1470ed5401bSmrg const uint8_t *end; 1480ed5401bSmrg} bufferInfo; 1490ed5401bSmrg 1500ed5401bSmrgtypedef struct h264_decode_t { 1510ed5401bSmrg uint8_t profile; 1520ed5401bSmrg uint8_t level_idc; 1530ed5401bSmrg uint8_t nal_ref_idc; 1540ed5401bSmrg uint8_t nal_unit_type; 1550ed5401bSmrg uint32_t pic_width, pic_height; 1560ed5401bSmrg uint32_t slice_type; 1570ed5401bSmrg} h264_decode; 1580ed5401bSmrg 159d8807b2fSmrgstatic amdgpu_device_handle device_handle; 160d8807b2fSmrgstatic uint32_t major_version; 161d8807b2fSmrgstatic uint32_t minor_version; 162d8807b2fSmrgstatic uint32_t family_id; 16341687f09Smrgstatic uint32_t chip_rev; 16441687f09Smrgstatic uint32_t chip_id; 1659bd392adSmrgstatic uint32_t asic_id; 16641687f09Smrgstatic uint32_t chip_rev; 1670ed5401bSmrgstatic struct amdgpu_vcn_bo enc_buf; 1680ed5401bSmrgstatic struct amdgpu_vcn_bo cpb_buf; 1690ed5401bSmrgstatic uint32_t enc_task_id; 170d8807b2fSmrg 171d8807b2fSmrgstatic amdgpu_context_handle context_handle; 172d8807b2fSmrgstatic amdgpu_bo_handle ib_handle; 173d8807b2fSmrgstatic amdgpu_va_handle ib_va_handle; 174d8807b2fSmrgstatic uint64_t ib_mc_address; 175d8807b2fSmrgstatic uint32_t *ib_cpu; 176b0ab5608Smrgstatic uint32_t *ib_checksum; 177b0ab5608Smrgstatic uint32_t *ib_size_in_dw; 178b0ab5608Smrg 179b0ab5608Smrgstatic rvcn_decode_buffer_t *decode_buffer; 180bbff01ceSmrgstruct amdgpu_vcn_bo session_ctx_buf; 181d8807b2fSmrg 182d8807b2fSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES]; 183d8807b2fSmrgstatic unsigned num_resources; 1840ed5401bSmrg 1850ed5401bSmrgstatic uint8_t vcn_reg_index; 1860ed5401bSmrgstatic struct amdgpu_vcn_reg reg[] = { 1870ed5401bSmrg {0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6}, 1880ed5401bSmrg {0x504, 0x505, 0x503, 0x53f, 0x506}, 1890ed5401bSmrg {0x10, 0x11, 0xf, 0x29, 0x26d}, 1900ed5401bSmrg}; 1910ed5401bSmrg 1920ed5401bSmrguint32_t gWidth, gHeight, gSliceType; 193b0ab5608Smrgstatic uint32_t vcn_ip_version_major; 194b0ab5608Smrgstatic uint32_t vcn_ip_version_minor; 195d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void); 196d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void); 197d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void); 198d8807b2fSmrg 199d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void); 200d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void); 201d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void); 202d8807b2fSmrg 203b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc); 204b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end); 2050ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count); 2060ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo); 2070ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo); 2080ed5401bSmrgstatic inline int bs_eof(bufferInfo *bufinfo); 2090ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n); 2100ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo); 2110ed5401bSmrgstatic uint32_t remove_03 (uint8_t *bptr, uint32_t len); 2120ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo); 2130ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo); 2140ed5401bSmrgstatic void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo); 2150ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo); 2160ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen); 2170ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size); 2180ed5401bSmrg 219d8807b2fSmrgCU_TestInfo vcn_tests[] = { 220d8807b2fSmrg 221d8807b2fSmrg { "VCN DEC create", amdgpu_cs_vcn_dec_create }, 222d8807b2fSmrg { "VCN DEC decode", amdgpu_cs_vcn_dec_decode }, 223d8807b2fSmrg { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy }, 224d8807b2fSmrg 225d8807b2fSmrg { "VCN ENC create", amdgpu_cs_vcn_enc_create }, 2260ed5401bSmrg { "VCN ENC encode", amdgpu_cs_vcn_enc_encode }, 227d8807b2fSmrg { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy }, 228d8807b2fSmrg CU_TEST_INFO_NULL, 229d8807b2fSmrg}; 230d8807b2fSmrg 23100a23bdaSmrgCU_BOOL suite_vcn_tests_enable(void) 23200a23bdaSmrg{ 23341687f09Smrg struct drm_amdgpu_info_hw_ip info; 234b0ab5608Smrg bool enc_ring, dec_ring; 235b0ab5608Smrg int r; 23600a23bdaSmrg 23700a23bdaSmrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 23800a23bdaSmrg &minor_version, &device_handle)) 23900a23bdaSmrg return CU_FALSE; 24000a23bdaSmrg 24100a23bdaSmrg family_id = device_handle->info.family_id; 2429bd392adSmrg asic_id = device_handle->info.asic_id; 24341687f09Smrg chip_rev = device_handle->info.chip_rev; 24441687f09Smrg chip_id = device_handle->info.chip_external_rev; 24541687f09Smrg 246b0ab5608Smrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info); 247b0ab5608Smrg if (!r) { 248b0ab5608Smrg vcn_ip_version_major = info.hw_ip_version_major; 249b0ab5608Smrg vcn_ip_version_minor = info.hw_ip_version_minor; 250b0ab5608Smrg enc_ring = !!info.available_rings; 251b0ab5608Smrg /* in vcn 4.0 it re-uses encoding queue as unified queue */ 252b0ab5608Smrg if (vcn_ip_version_major >= 4) { 253b0ab5608Smrg vcn_unified_ring = true; 254b0ab5608Smrg vcn_dec_sw_ring = true; 255b0ab5608Smrg dec_ring = enc_ring; 256b0ab5608Smrg } else { 257b0ab5608Smrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); 258b0ab5608Smrg dec_ring = !!info.available_rings; 259b0ab5608Smrg } 260b0ab5608Smrg } 26100a23bdaSmrg 26200a23bdaSmrg if (amdgpu_device_deinitialize(device_handle)) 2630ed5401bSmrg return CU_FALSE; 26400a23bdaSmrg 265b0ab5608Smrg if (r) { 266b0ab5608Smrg printf("\n\nASIC query hw info failed\n"); 267b0ab5608Smrg return CU_FALSE; 268b0ab5608Smrg } 269b0ab5608Smrg 270b0ab5608Smrg if (!(dec_ring || enc_ring) || 27141687f09Smrg (family_id < AMDGPU_FAMILY_RV && 27241687f09Smrg (family_id == AMDGPU_FAMILY_AI && 2734babd585Smrg (chip_id - chip_rev) < 0x32))) { /* Arcturus */ 27400a23bdaSmrg printf("\n\nThe ASIC NOT support VCN, suite disabled\n"); 27500a23bdaSmrg return CU_FALSE; 27600a23bdaSmrg } 27700a23bdaSmrg 278b0ab5608Smrg if (!dec_ring) { 279b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE); 280b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE); 281b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE); 282b0ab5608Smrg } 283b0ab5608Smrg 284b0ab5608Smrg if (family_id == AMDGPU_FAMILY_AI || !enc_ring) { 28541687f09Smrg amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); 2860ed5401bSmrg amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); 28741687f09Smrg amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); 28841687f09Smrg } 28941687f09Smrg 290b0ab5608Smrg if (vcn_ip_version_major == 1) 2910ed5401bSmrg vcn_reg_index = 0; 292b0ab5608Smrg else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0) 2930ed5401bSmrg vcn_reg_index = 1; 294b0ab5608Smrg else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) || 295b0ab5608Smrg vcn_ip_version_major == 3) 2960ed5401bSmrg vcn_reg_index = 2; 2975324fb0dSmrg 29800a23bdaSmrg return CU_TRUE; 29900a23bdaSmrg} 30000a23bdaSmrg 301d8807b2fSmrgint suite_vcn_tests_init(void) 302d8807b2fSmrg{ 303d8807b2fSmrg int r; 304d8807b2fSmrg 305d8807b2fSmrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 306d8807b2fSmrg &minor_version, &device_handle); 307d8807b2fSmrg if (r) 308d8807b2fSmrg return CUE_SINIT_FAILED; 309d8807b2fSmrg 310d8807b2fSmrg family_id = device_handle->info.family_id; 311d8807b2fSmrg 312d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 313d8807b2fSmrg if (r) 314d8807b2fSmrg return CUE_SINIT_FAILED; 315d8807b2fSmrg 316d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, 317d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 318d8807b2fSmrg &ib_handle, (void**)&ib_cpu, 319d8807b2fSmrg &ib_mc_address, &ib_va_handle); 320d8807b2fSmrg if (r) 321d8807b2fSmrg return CUE_SINIT_FAILED; 322d8807b2fSmrg 323d8807b2fSmrg return CUE_SUCCESS; 324d8807b2fSmrg} 325d8807b2fSmrg 326d8807b2fSmrgint suite_vcn_tests_clean(void) 327d8807b2fSmrg{ 328d8807b2fSmrg int r; 329d8807b2fSmrg 33000a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 33100a23bdaSmrg ib_mc_address, IB_SIZE); 33200a23bdaSmrg if (r) 33300a23bdaSmrg return CUE_SCLEAN_FAILED; 33400a23bdaSmrg 33500a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 33600a23bdaSmrg if (r) 33700a23bdaSmrg return CUE_SCLEAN_FAILED; 33800a23bdaSmrg 33900a23bdaSmrg r = amdgpu_device_deinitialize(device_handle); 34000a23bdaSmrg if (r) 34100a23bdaSmrg return CUE_SCLEAN_FAILED; 342d8807b2fSmrg 343d8807b2fSmrg return CUE_SUCCESS; 344d8807b2fSmrg} 345d8807b2fSmrg 346b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc) 347b0ab5608Smrg{ 348b0ab5608Smrg /* signature */ 349b0ab5608Smrg *(base + (*offset)++) = 0x00000010; 350b0ab5608Smrg *(base + (*offset)++) = 0x30000002; 351b0ab5608Smrg ib_checksum = base + (*offset)++; 352b0ab5608Smrg ib_size_in_dw = base + (*offset)++; 353b0ab5608Smrg 354b0ab5608Smrg /* engine info */ 355b0ab5608Smrg *(base + (*offset)++) = 0x00000010; 356b0ab5608Smrg *(base + (*offset)++) = 0x30000001; 357b0ab5608Smrg *(base + (*offset)++) = enc ? 2 : 3; 358b0ab5608Smrg *(base + (*offset)++) = 0x00000000; 359b0ab5608Smrg} 360b0ab5608Smrg 361b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end) 362b0ab5608Smrg{ 363b0ab5608Smrg uint32_t size_in_dw; 364b0ab5608Smrg uint32_t checksum = 0; 365b0ab5608Smrg 366b0ab5608Smrg /* if the pointers are invalid, no need to process */ 367b0ab5608Smrg if (ib_checksum == NULL || ib_size_in_dw == NULL) 368b0ab5608Smrg return; 369b0ab5608Smrg 370b0ab5608Smrg size_in_dw = end - ib_size_in_dw - 1; 371b0ab5608Smrg *ib_size_in_dw = size_in_dw; 372b0ab5608Smrg *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); 373b0ab5608Smrg 374b0ab5608Smrg for (int i = 0; i < size_in_dw; i++) 375b0ab5608Smrg checksum += *(ib_checksum + 2 + i); 376b0ab5608Smrg 377b0ab5608Smrg *ib_checksum = checksum; 378b0ab5608Smrg 379b0ab5608Smrg ib_checksum = NULL; 380b0ab5608Smrg ib_size_in_dw = NULL; 381b0ab5608Smrg} 382b0ab5608Smrg 383d8807b2fSmrgstatic int submit(unsigned ndw, unsigned ip) 384d8807b2fSmrg{ 385d8807b2fSmrg struct amdgpu_cs_request ibs_request = {0}; 386d8807b2fSmrg struct amdgpu_cs_ib_info ib_info = {0}; 387d8807b2fSmrg struct amdgpu_cs_fence fence_status = {0}; 388d8807b2fSmrg uint32_t expired; 389d8807b2fSmrg int r; 390d8807b2fSmrg 391d8807b2fSmrg ib_info.ib_mc_address = ib_mc_address; 392d8807b2fSmrg ib_info.size = ndw; 393d8807b2fSmrg 394d8807b2fSmrg ibs_request.ip_type = ip; 395d8807b2fSmrg 396d8807b2fSmrg r = amdgpu_bo_list_create(device_handle, num_resources, resources, 397d8807b2fSmrg NULL, &ibs_request.resources); 398d8807b2fSmrg if (r) 399d8807b2fSmrg return r; 400d8807b2fSmrg 401d8807b2fSmrg ibs_request.number_of_ibs = 1; 402d8807b2fSmrg ibs_request.ibs = &ib_info; 403d8807b2fSmrg ibs_request.fence_info.handle = NULL; 404d8807b2fSmrg 405d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 406d8807b2fSmrg if (r) 407d8807b2fSmrg return r; 408d8807b2fSmrg 409d8807b2fSmrg r = amdgpu_bo_list_destroy(ibs_request.resources); 410d8807b2fSmrg if (r) 411d8807b2fSmrg return r; 412d8807b2fSmrg 413d8807b2fSmrg fence_status.context = context_handle; 414d8807b2fSmrg fence_status.ip_type = ip; 415d8807b2fSmrg fence_status.fence = ibs_request.seq_no; 416d8807b2fSmrg 417d8807b2fSmrg r = amdgpu_cs_query_fence_status(&fence_status, 418d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 419d8807b2fSmrg 0, &expired); 420d8807b2fSmrg if (r) 421d8807b2fSmrg return r; 422d8807b2fSmrg 423d8807b2fSmrg return 0; 424d8807b2fSmrg} 425d8807b2fSmrg 426d8807b2fSmrgstatic void alloc_resource(struct amdgpu_vcn_bo *vcn_bo, 427d8807b2fSmrg unsigned size, unsigned domain) 428d8807b2fSmrg{ 429d8807b2fSmrg struct amdgpu_bo_alloc_request req = {0}; 430d8807b2fSmrg amdgpu_bo_handle buf_handle; 431d8807b2fSmrg amdgpu_va_handle va_handle; 432d8807b2fSmrg uint64_t va = 0; 433d8807b2fSmrg int r; 434d8807b2fSmrg 435d8807b2fSmrg req.alloc_size = ALIGN(size, 4096); 436d8807b2fSmrg req.preferred_heap = domain; 437d8807b2fSmrg r = amdgpu_bo_alloc(device_handle, &req, &buf_handle); 438d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 439d8807b2fSmrg r = amdgpu_va_range_alloc(device_handle, 440d8807b2fSmrg amdgpu_gpu_va_range_general, 441d8807b2fSmrg req.alloc_size, 1, 0, &va, 442d8807b2fSmrg &va_handle, 0); 443d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 444d8807b2fSmrg r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0, 445d8807b2fSmrg AMDGPU_VA_OP_MAP); 446d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 447d8807b2fSmrg vcn_bo->addr = va; 448d8807b2fSmrg vcn_bo->handle = buf_handle; 449d8807b2fSmrg vcn_bo->size = req.alloc_size; 450d8807b2fSmrg vcn_bo->va_handle = va_handle; 451d8807b2fSmrg r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr); 452d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 453d8807b2fSmrg memset(vcn_bo->ptr, 0, size); 454d8807b2fSmrg r = amdgpu_bo_cpu_unmap(vcn_bo->handle); 455d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 456d8807b2fSmrg} 457d8807b2fSmrg 458d8807b2fSmrgstatic void free_resource(struct amdgpu_vcn_bo *vcn_bo) 459d8807b2fSmrg{ 460d8807b2fSmrg int r; 461d8807b2fSmrg 462d8807b2fSmrg r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size, 463d8807b2fSmrg vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP); 464d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 465d8807b2fSmrg 466d8807b2fSmrg r = amdgpu_va_range_free(vcn_bo->va_handle); 467d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 468d8807b2fSmrg 469d8807b2fSmrg r = amdgpu_bo_free(vcn_bo->handle); 470d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 471d8807b2fSmrg memset(vcn_bo, 0, sizeof(*vcn_bo)); 472d8807b2fSmrg} 473d8807b2fSmrg 474d8807b2fSmrgstatic void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) 475d8807b2fSmrg{ 476b0ab5608Smrg if (vcn_dec_sw_ring == false) { 477b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; 478b0ab5608Smrg ib_cpu[(*idx)++] = addr; 479b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; 480b0ab5608Smrg ib_cpu[(*idx)++] = addr >> 32; 481b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; 482b0ab5608Smrg ib_cpu[(*idx)++] = cmd << 1; 483b0ab5608Smrg return; 484b0ab5608Smrg } 485b0ab5608Smrg 486b0ab5608Smrg /* Support decode software ring message */ 487b0ab5608Smrg if (!(*idx)) { 488b0ab5608Smrg rvcn_decode_ib_package_t *ib_header; 489b0ab5608Smrg 490b0ab5608Smrg if (vcn_unified_ring) 491b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, idx, false); 492b0ab5608Smrg 493b0ab5608Smrg ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx]; 494b0ab5608Smrg ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + 495b0ab5608Smrg sizeof(struct rvcn_decode_ib_package_s); 496b0ab5608Smrg 497b0ab5608Smrg (*idx)++; 498b0ab5608Smrg ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER); 499b0ab5608Smrg (*idx)++; 500b0ab5608Smrg 501b0ab5608Smrg decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]); 502b0ab5608Smrg *idx += sizeof(struct rvcn_decode_buffer_s) / 4; 503b0ab5608Smrg memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); 504b0ab5608Smrg } 505b0ab5608Smrg 506b0ab5608Smrg switch(cmd) { 507b0ab5608Smrg case DECODE_CMD_MSG_BUFFER: 508b0ab5608Smrg decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER; 509b0ab5608Smrg decode_buffer->msg_buffer_address_hi = (addr >> 32); 510b0ab5608Smrg decode_buffer->msg_buffer_address_lo = (addr); 511b0ab5608Smrg break; 512b0ab5608Smrg case DECODE_CMD_DPB_BUFFER: 513b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER); 514b0ab5608Smrg decode_buffer->dpb_buffer_address_hi = (addr >> 32); 515b0ab5608Smrg decode_buffer->dpb_buffer_address_lo = (addr); 516b0ab5608Smrg break; 517b0ab5608Smrg case DECODE_CMD_DECODING_TARGET_BUFFER: 518b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER); 519b0ab5608Smrg decode_buffer->target_buffer_address_hi = (addr >> 32); 520b0ab5608Smrg decode_buffer->target_buffer_address_lo = (addr); 521b0ab5608Smrg break; 522b0ab5608Smrg case DECODE_CMD_FEEDBACK_BUFFER: 523b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER); 524b0ab5608Smrg decode_buffer->feedback_buffer_address_hi = (addr >> 32); 525b0ab5608Smrg decode_buffer->feedback_buffer_address_lo = (addr); 526b0ab5608Smrg break; 527b0ab5608Smrg case DECODE_CMD_PROB_TBL_BUFFER: 528b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER); 529b0ab5608Smrg decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32); 530b0ab5608Smrg decode_buffer->prob_tbl_buffer_address_lo = (addr); 531b0ab5608Smrg break; 532b0ab5608Smrg case DECODE_CMD_SESSION_CONTEXT_BUFFER: 533b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER); 534b0ab5608Smrg decode_buffer->session_contex_buffer_address_hi = (addr >> 32); 535b0ab5608Smrg decode_buffer->session_contex_buffer_address_lo = (addr); 536b0ab5608Smrg break; 537b0ab5608Smrg case DECODE_CMD_BITSTREAM_BUFFER: 538b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER); 539b0ab5608Smrg decode_buffer->bitstream_buffer_address_hi = (addr >> 32); 540b0ab5608Smrg decode_buffer->bitstream_buffer_address_lo = (addr); 541b0ab5608Smrg break; 542b0ab5608Smrg case DECODE_CMD_IT_SCALING_TABLE_BUFFER: 543b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER); 544b0ab5608Smrg decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32); 545b0ab5608Smrg decode_buffer->it_sclr_table_buffer_address_lo = (addr); 546b0ab5608Smrg break; 547b0ab5608Smrg case DECODE_CMD_CONTEXT_BUFFER: 548b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER); 549b0ab5608Smrg decode_buffer->context_buffer_address_hi = (addr >> 32); 550b0ab5608Smrg decode_buffer->context_buffer_address_lo = (addr); 551b0ab5608Smrg break; 552b0ab5608Smrg default: 553b0ab5608Smrg printf("Not Support!\n"); 554b0ab5608Smrg } 555d8807b2fSmrg} 556d8807b2fSmrg 557d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void) 558d8807b2fSmrg{ 559d8807b2fSmrg struct amdgpu_vcn_bo msg_buf; 560b0ab5608Smrg unsigned ip; 561d8807b2fSmrg int len, r; 562d8807b2fSmrg 563d8807b2fSmrg num_resources = 0; 564d8807b2fSmrg alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 565bbff01ceSmrg alloc_resource(&session_ctx_buf, 32 * 4096, AMDGPU_GEM_DOMAIN_VRAM); 566d8807b2fSmrg resources[num_resources++] = msg_buf.handle; 567bbff01ceSmrg resources[num_resources++] = session_ctx_buf.handle; 568d8807b2fSmrg resources[num_resources++] = ib_handle; 569d8807b2fSmrg 570d8807b2fSmrg r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 571d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 572d8807b2fSmrg 573d8807b2fSmrg memset(msg_buf.ptr, 0, 4096); 574d8807b2fSmrg memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); 575d8807b2fSmrg 576d8807b2fSmrg len = 0; 577bbff01ceSmrg 578bbff01ceSmrg vcn_dec_cmd(session_ctx_buf.addr, 5, &len); 579bbff01ceSmrg if (vcn_dec_sw_ring == true) { 580b0ab5608Smrg vcn_dec_cmd(msg_buf.addr, 0, &len); 581bbff01ceSmrg } else { 582b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data0; 583b0ab5608Smrg ib_cpu[len++] = msg_buf.addr; 584b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data1; 585b0ab5608Smrg ib_cpu[len++] = msg_buf.addr >> 32; 586b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cmd; 5876532f28eSmrg ib_cpu[len++] = 0; 588b0ab5608Smrg for (; len % 16; ) { 589b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 590b0ab5608Smrg ib_cpu[len++] = 0; 591b0ab5608Smrg } 5926532f28eSmrg } 593d8807b2fSmrg 594b0ab5608Smrg if (vcn_unified_ring) { 595b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 596b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 597b0ab5608Smrg } else 598b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 599b0ab5608Smrg 600b0ab5608Smrg r = submit(len, ip); 601b0ab5608Smrg 602d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 603d8807b2fSmrg 604d8807b2fSmrg free_resource(&msg_buf); 605d8807b2fSmrg} 606d8807b2fSmrg 607d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void) 608d8807b2fSmrg{ 60900a23bdaSmrg const unsigned dpb_size = 15923584, dt_size = 737280; 610d8807b2fSmrg uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum; 611d8807b2fSmrg struct amdgpu_vcn_bo dec_buf; 612d8807b2fSmrg int size, len, i, r; 613b0ab5608Smrg unsigned ip; 614d8807b2fSmrg uint8_t *dec; 615d8807b2fSmrg 616d8807b2fSmrg size = 4*1024; /* msg */ 617d8807b2fSmrg size += 4*1024; /* fb */ 618d8807b2fSmrg size += 4096; /*it_scaling_table*/ 619d8807b2fSmrg size += ALIGN(sizeof(uvd_bitstream), 4*1024); 620d8807b2fSmrg size += ALIGN(dpb_size, 4*1024); 621d8807b2fSmrg size += ALIGN(dt_size, 4*1024); 622d8807b2fSmrg 6230ed5401bSmrg num_resources = 0; 624d8807b2fSmrg alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT); 625d8807b2fSmrg resources[num_resources++] = dec_buf.handle; 626d8807b2fSmrg resources[num_resources++] = ib_handle; 627d8807b2fSmrg 628d8807b2fSmrg r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr); 629d8807b2fSmrg dec = dec_buf.ptr; 630d8807b2fSmrg 631d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 632d8807b2fSmrg memset(dec_buf.ptr, 0, size); 633d8807b2fSmrg memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg)); 634d8807b2fSmrg memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg), 635d8807b2fSmrg avc_decode_msg, sizeof(avc_decode_msg)); 636d8807b2fSmrg 637d8807b2fSmrg dec += 4*1024; 6389bd392adSmrg memcpy(dec, feedback_msg, sizeof(feedback_msg)); 639d8807b2fSmrg dec += 4*1024; 640d8807b2fSmrg memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table)); 641d8807b2fSmrg 642d8807b2fSmrg dec += 4*1024; 643d8807b2fSmrg memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream)); 644d8807b2fSmrg 645d8807b2fSmrg dec += ALIGN(sizeof(uvd_bitstream), 4*1024); 646d8807b2fSmrg 647d8807b2fSmrg dec += ALIGN(dpb_size, 4*1024); 648d8807b2fSmrg 649d8807b2fSmrg msg_addr = dec_buf.addr; 650d8807b2fSmrg fb_addr = msg_addr + 4*1024; 651d8807b2fSmrg it_addr = fb_addr + 4*1024; 652d8807b2fSmrg bs_addr = it_addr + 4*1024; 653d8807b2fSmrg dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024); 654d8807b2fSmrg ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); 655d8807b2fSmrg dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024); 656d8807b2fSmrg 657d8807b2fSmrg len = 0; 658bbff01ceSmrg vcn_dec_cmd(session_ctx_buf.addr, 0x5, &len); 659d8807b2fSmrg vcn_dec_cmd(msg_addr, 0x0, &len); 660d8807b2fSmrg vcn_dec_cmd(dpb_addr, 0x1, &len); 661d8807b2fSmrg vcn_dec_cmd(dt_addr, 0x2, &len); 662d8807b2fSmrg vcn_dec_cmd(fb_addr, 0x3, &len); 663d8807b2fSmrg vcn_dec_cmd(bs_addr, 0x100, &len); 664d8807b2fSmrg vcn_dec_cmd(it_addr, 0x204, &len); 665d8807b2fSmrg vcn_dec_cmd(ctx_addr, 0x206, &len); 666d8807b2fSmrg 667b0ab5608Smrg if (vcn_dec_sw_ring == false) { 668b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cntl; 669b0ab5608Smrg ib_cpu[len++] = 0x1; 670b0ab5608Smrg for (; len % 16; ) { 671b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 672b0ab5608Smrg ib_cpu[len++] = 0; 673b0ab5608Smrg } 6746532f28eSmrg } 675d8807b2fSmrg 676b0ab5608Smrg if (vcn_unified_ring) { 677b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 678b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 679b0ab5608Smrg } else 680b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 681b0ab5608Smrg 682b0ab5608Smrg r = submit(len, ip); 683d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 684d8807b2fSmrg 685d8807b2fSmrg for (i = 0, sum = 0; i < dt_size; ++i) 686d8807b2fSmrg sum += dec[i]; 687d8807b2fSmrg 688d8807b2fSmrg CU_ASSERT_EQUAL(sum, SUM_DECODE); 689d8807b2fSmrg 690d8807b2fSmrg free_resource(&dec_buf); 691d8807b2fSmrg} 692d8807b2fSmrg 693d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void) 694d8807b2fSmrg{ 695d8807b2fSmrg struct amdgpu_vcn_bo msg_buf; 696b0ab5608Smrg unsigned ip; 697d8807b2fSmrg int len, r; 698d8807b2fSmrg 6990ed5401bSmrg num_resources = 0; 700d8807b2fSmrg alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT); 701d8807b2fSmrg resources[num_resources++] = msg_buf.handle; 702d8807b2fSmrg resources[num_resources++] = ib_handle; 703d8807b2fSmrg 704d8807b2fSmrg r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 705d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 706d8807b2fSmrg 707d8807b2fSmrg memset(msg_buf.ptr, 0, 1024); 708d8807b2fSmrg memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); 709d8807b2fSmrg 710d8807b2fSmrg len = 0; 711bbff01ceSmrg vcn_dec_cmd(session_ctx_buf.addr, 5, &len); 712bbff01ceSmrg if (vcn_dec_sw_ring == true) { 713b0ab5608Smrg vcn_dec_cmd(msg_buf.addr, 0, &len); 714bbff01ceSmrg } else { 715b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data0; 716b0ab5608Smrg ib_cpu[len++] = msg_buf.addr; 717b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data1; 718b0ab5608Smrg ib_cpu[len++] = msg_buf.addr >> 32; 719b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cmd; 7206532f28eSmrg ib_cpu[len++] = 0; 721b0ab5608Smrg for (; len % 16; ) { 722b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 723b0ab5608Smrg ib_cpu[len++] = 0; 724b0ab5608Smrg } 7256532f28eSmrg } 726d8807b2fSmrg 727b0ab5608Smrg if (vcn_unified_ring) { 728b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 729b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 730b0ab5608Smrg } else 731b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 732b0ab5608Smrg 733b0ab5608Smrg r = submit(len, ip); 734d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 735d8807b2fSmrg 736d8807b2fSmrg free_resource(&msg_buf); 737bbff01ceSmrg free_resource(&session_ctx_buf); 738d8807b2fSmrg} 739d8807b2fSmrg 740d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void) 741d8807b2fSmrg{ 7420ed5401bSmrg int len, r; 7430ed5401bSmrg uint32_t *p_task_size = NULL; 7440ed5401bSmrg uint32_t task_offset = 0, st_offset; 7450ed5401bSmrg uint32_t *st_size = NULL; 7460ed5401bSmrg unsigned width = 160, height = 128, buf_size; 7470ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 7480ed5401bSmrg 749b0ab5608Smrg if (vcn_ip_version_major == 2) { 7500ed5401bSmrg fw_maj = 1; 7510ed5401bSmrg fw_min = 1; 752b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 7530ed5401bSmrg fw_maj = 1; 7540ed5401bSmrg fw_min = 0; 7550ed5401bSmrg } 7560ed5401bSmrg 7570ed5401bSmrg gWidth = width; 7580ed5401bSmrg gHeight = height; 7590ed5401bSmrg buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 7600ed5401bSmrg enc_task_id = 1; 7610ed5401bSmrg 7620ed5401bSmrg num_resources = 0; 7630ed5401bSmrg alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); 7640ed5401bSmrg alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT); 7650ed5401bSmrg resources[num_resources++] = enc_buf.handle; 7660ed5401bSmrg resources[num_resources++] = cpb_buf.handle; 7670ed5401bSmrg resources[num_resources++] = ib_handle; 7680ed5401bSmrg 7690ed5401bSmrg r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr); 7700ed5401bSmrg memset(enc_buf.ptr, 0, 128 * 1024); 7710ed5401bSmrg r = amdgpu_bo_cpu_unmap(enc_buf.handle); 7720ed5401bSmrg 7730ed5401bSmrg r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr); 7740ed5401bSmrg memset(enc_buf.ptr, 0, buf_size * 2); 7750ed5401bSmrg r = amdgpu_bo_cpu_unmap(cpb_buf.handle); 7760ed5401bSmrg 7770ed5401bSmrg len = 0; 778b0ab5608Smrg 779b0ab5608Smrg if (vcn_unified_ring) 780b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 781b0ab5608Smrg 7820ed5401bSmrg /* session info */ 7830ed5401bSmrg st_offset = len; 7840ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 7850ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 7860ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 7870ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 7880ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 7890ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 7900ed5401bSmrg *st_size = (len - st_offset) * 4; 7910ed5401bSmrg 7920ed5401bSmrg /* task info */ 7930ed5401bSmrg task_offset = len; 7940ed5401bSmrg st_offset = len; 7950ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 7960ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 7970ed5401bSmrg p_task_size = &ib_cpu[len++]; 7980ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 7990ed5401bSmrg ib_cpu[len++] = 0; /* feedback */ 8000ed5401bSmrg *st_size = (len - st_offset) * 4; 8010ed5401bSmrg 8020ed5401bSmrg /* op init */ 8030ed5401bSmrg st_offset = len; 8040ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8050ed5401bSmrg ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */ 8060ed5401bSmrg *st_size = (len - st_offset) * 4; 8070ed5401bSmrg 8080ed5401bSmrg /* session_init */ 8090ed5401bSmrg st_offset = len; 8100ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8110ed5401bSmrg ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */ 8120ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */ 8130ed5401bSmrg ib_cpu[len++] = width; 8140ed5401bSmrg ib_cpu[len++] = height; 8150ed5401bSmrg ib_cpu[len++] = 0; 8160ed5401bSmrg ib_cpu[len++] = 0; 8170ed5401bSmrg ib_cpu[len++] = 0; /* pre encode mode */ 8180ed5401bSmrg ib_cpu[len++] = 0; /* chroma enabled : false */ 819bbff01ceSmrg ib_cpu[len++] = 0; 820bbff01ceSmrg ib_cpu[len++] = 0; 8210ed5401bSmrg *st_size = (len - st_offset) * 4; 8220ed5401bSmrg 8230ed5401bSmrg /* slice control */ 8240ed5401bSmrg st_offset = len; 8250ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8260ed5401bSmrg ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */ 8270ed5401bSmrg ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */ 8280ed5401bSmrg ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16; 8290ed5401bSmrg *st_size = (len - st_offset) * 4; 8300ed5401bSmrg 8310ed5401bSmrg /* enc spec misc */ 8320ed5401bSmrg st_offset = len; 8330ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8340ed5401bSmrg ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */ 8350ed5401bSmrg ib_cpu[len++] = 0; /* constrained intra pred flag */ 8360ed5401bSmrg ib_cpu[len++] = 0; /* cabac enable */ 8370ed5401bSmrg ib_cpu[len++] = 0; /* cabac init idc */ 8380ed5401bSmrg ib_cpu[len++] = 1; /* half pel enabled */ 8390ed5401bSmrg ib_cpu[len++] = 1; /* quarter pel enabled */ 8400ed5401bSmrg ib_cpu[len++] = 100; /* BASELINE profile */ 8410ed5401bSmrg ib_cpu[len++] = 11; /* level */ 842bbff01ceSmrg if (vcn_ip_version_major >= 3) { 8430ed5401bSmrg ib_cpu[len++] = 0; /* b_picture_enabled */ 8440ed5401bSmrg ib_cpu[len++] = 0; /* weighted_bipred_idc */ 8450ed5401bSmrg } 8460ed5401bSmrg *st_size = (len - st_offset) * 4; 8470ed5401bSmrg 8480ed5401bSmrg /* deblocking filter */ 8490ed5401bSmrg st_offset = len; 8500ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8510ed5401bSmrg ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */ 8520ed5401bSmrg ib_cpu[len++] = 0; /* disable deblocking filter idc */ 8530ed5401bSmrg ib_cpu[len++] = 0; /* alpha c0 offset */ 8540ed5401bSmrg ib_cpu[len++] = 0; /* tc offset */ 8550ed5401bSmrg ib_cpu[len++] = 0; /* cb offset */ 8560ed5401bSmrg ib_cpu[len++] = 0; /* cr offset */ 8570ed5401bSmrg *st_size = (len - st_offset) * 4; 8580ed5401bSmrg 8590ed5401bSmrg /* layer control */ 8600ed5401bSmrg st_offset = len; 8610ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8620ed5401bSmrg ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */ 8630ed5401bSmrg ib_cpu[len++] = 1; /* max temporal layer */ 8640ed5401bSmrg ib_cpu[len++] = 1; /* no of temporal layer */ 8650ed5401bSmrg *st_size = (len - st_offset) * 4; 8660ed5401bSmrg 8670ed5401bSmrg /* rc_session init */ 8680ed5401bSmrg st_offset = len; 8690ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8700ed5401bSmrg ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */ 8710ed5401bSmrg ib_cpu[len++] = 0; /* rate control */ 8720ed5401bSmrg ib_cpu[len++] = 48; /* vbv buffer level */ 8730ed5401bSmrg *st_size = (len - st_offset) * 4; 8740ed5401bSmrg 8750ed5401bSmrg /* quality params */ 8760ed5401bSmrg st_offset = len; 8770ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8780ed5401bSmrg ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */ 8790ed5401bSmrg ib_cpu[len++] = 0; /* vbaq mode */ 8800ed5401bSmrg ib_cpu[len++] = 0; /* scene change sensitivity */ 8810ed5401bSmrg ib_cpu[len++] = 0; /* scene change min idr interval */ 8820ed5401bSmrg ib_cpu[len++] = 0; 883bbff01ceSmrg if (vcn_ip_version_major >= 3) 8840ed5401bSmrg ib_cpu[len++] = 0; 8850ed5401bSmrg *st_size = (len - st_offset) * 4; 8860ed5401bSmrg 8870ed5401bSmrg /* layer select */ 8880ed5401bSmrg st_offset = len; 8890ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8900ed5401bSmrg ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 8910ed5401bSmrg ib_cpu[len++] = 0; /* temporal layer */ 8920ed5401bSmrg *st_size = (len - st_offset) * 4; 8930ed5401bSmrg 8940ed5401bSmrg /* rc layer init */ 8950ed5401bSmrg st_offset = len; 8960ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8970ed5401bSmrg ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */ 8980ed5401bSmrg ib_cpu[len++] = 0; 8990ed5401bSmrg ib_cpu[len++] = 0; 9000ed5401bSmrg ib_cpu[len++] = 25; 9010ed5401bSmrg ib_cpu[len++] = 1; 9020ed5401bSmrg ib_cpu[len++] = 0x01312d00; 9030ed5401bSmrg ib_cpu[len++] = 0; 9040ed5401bSmrg ib_cpu[len++] = 0; 9050ed5401bSmrg ib_cpu[len++] = 0; 9060ed5401bSmrg *st_size = (len - st_offset) * 4; 9070ed5401bSmrg 9080ed5401bSmrg /* layer select */ 9090ed5401bSmrg st_offset = len; 9100ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9110ed5401bSmrg ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 9120ed5401bSmrg ib_cpu[len++] = 0; /* temporal layer */ 9130ed5401bSmrg *st_size = (len - st_offset) * 4; 9140ed5401bSmrg 9150ed5401bSmrg /* rc per pic */ 9160ed5401bSmrg st_offset = len; 9170ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9180ed5401bSmrg ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */ 9190ed5401bSmrg ib_cpu[len++] = 20; 9200ed5401bSmrg ib_cpu[len++] = 0; 9210ed5401bSmrg ib_cpu[len++] = 51; 9220ed5401bSmrg ib_cpu[len++] = 0; 9230ed5401bSmrg ib_cpu[len++] = 1; 9240ed5401bSmrg ib_cpu[len++] = 0; 9250ed5401bSmrg ib_cpu[len++] = 1; 926bbff01ceSmrg ib_cpu[len++] = 0; 9270ed5401bSmrg *st_size = (len - st_offset) * 4; 9280ed5401bSmrg 9290ed5401bSmrg /* op init rc */ 9300ed5401bSmrg st_offset = len; 9310ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9320ed5401bSmrg ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */ 9330ed5401bSmrg *st_size = (len - st_offset) * 4; 9340ed5401bSmrg 9350ed5401bSmrg /* op init rc vbv */ 9360ed5401bSmrg st_offset = len; 9370ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9380ed5401bSmrg ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */ 9390ed5401bSmrg *st_size = (len - st_offset) * 4; 9400ed5401bSmrg 9410ed5401bSmrg *p_task_size = (len - task_offset) * 4; 9420ed5401bSmrg 943b0ab5608Smrg if (vcn_unified_ring) 944b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 945b0ab5608Smrg 9460ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 9470ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 9480ed5401bSmrg} 9490ed5401bSmrg 9500ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo) 9510ed5401bSmrg{ 9520ed5401bSmrg uint32_t ret; 9530ed5401bSmrg 9540ed5401bSmrg ret = bs_read_ue (bufInfo); 9550ed5401bSmrg if ((ret & 0x1) == 0) { 9560ed5401bSmrg ret >>= 1; 9570ed5401bSmrg int32_t temp = 0 - ret; 9580ed5401bSmrg return temp; 9590ed5401bSmrg } 9600ed5401bSmrg 9610ed5401bSmrg return (ret + 1) >> 1; 9620ed5401bSmrg} 9630ed5401bSmrg 9640ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count) 9650ed5401bSmrg{ 9660ed5401bSmrg uint32_t val; 9670ed5401bSmrg 9680ed5401bSmrg val = bs_read_u (bufInfo, count); 9690ed5401bSmrg if (val != 0) { 9700ed5401bSmrg printf ("field error - %d bits should be 0 is %x\n", count, val); 9710ed5401bSmrg } 9720ed5401bSmrg} 9730ed5401bSmrg 9740ed5401bSmrgstatic inline int bs_eof(bufferInfo * bufinfo) 9750ed5401bSmrg{ 9760ed5401bSmrg if (bufinfo->decBuffer >= bufinfo->end) 9770ed5401bSmrg return 1; 9780ed5401bSmrg else 9790ed5401bSmrg return 0; 9800ed5401bSmrg} 9810ed5401bSmrg 9820ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo) 9830ed5401bSmrg{ 9840ed5401bSmrg uint32_t r = 0; 9850ed5401bSmrg uint32_t temp = 0; 9860ed5401bSmrg 9870ed5401bSmrg bufinfo->numOfBitsInBuffer--; 9880ed5401bSmrg if (! bs_eof(bufinfo)) { 9890ed5401bSmrg temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer); 9900ed5401bSmrg r = temp & 0x01; 9910ed5401bSmrg } 9920ed5401bSmrg 9930ed5401bSmrg if (bufinfo->numOfBitsInBuffer == 0) { 9940ed5401bSmrg bufinfo->decBuffer++; 9950ed5401bSmrg bufinfo->decData = *bufinfo->decBuffer; 9960ed5401bSmrg bufinfo->numOfBitsInBuffer = 8; 9970ed5401bSmrg } 9980ed5401bSmrg 9990ed5401bSmrg return r; 10000ed5401bSmrg} 10010ed5401bSmrg 10020ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n) 10030ed5401bSmrg{ 10040ed5401bSmrg uint32_t r = 0; 10050ed5401bSmrg int i; 10060ed5401bSmrg 10070ed5401bSmrg for (i = 0; i < n; i++) { 10080ed5401bSmrg r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) ); 10090ed5401bSmrg } 10100ed5401bSmrg 10110ed5401bSmrg return r; 10120ed5401bSmrg} 10130ed5401bSmrg 10140ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo) 10150ed5401bSmrg{ 10160ed5401bSmrg int32_t r = 0; 10170ed5401bSmrg int i = 0; 10180ed5401bSmrg 10190ed5401bSmrg while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) { 10200ed5401bSmrg i++; 10210ed5401bSmrg } 10220ed5401bSmrg r = bs_read_u(bufinfo, i); 10230ed5401bSmrg r += (1 << i) - 1; 10240ed5401bSmrg return r; 10250ed5401bSmrg} 10260ed5401bSmrg 10270ed5401bSmrgstatic uint32_t remove_03 (uint8_t * bptr, uint32_t len) 10280ed5401bSmrg{ 10290ed5401bSmrg uint32_t nal_len = 0; 10300ed5401bSmrg while (nal_len + 2 < len) { 10310ed5401bSmrg if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) { 10320ed5401bSmrg bptr += 2; 10330ed5401bSmrg nal_len += 2; 10340ed5401bSmrg len--; 10350ed5401bSmrg memmove (bptr, bptr + 1, len - nal_len); 10360ed5401bSmrg } else { 10370ed5401bSmrg bptr++; 10380ed5401bSmrg nal_len++; 10390ed5401bSmrg } 10400ed5401bSmrg } 10410ed5401bSmrg return len; 10420ed5401bSmrg} 10430ed5401bSmrg 10440ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo) 10450ed5401bSmrg{ 10460ed5401bSmrg uint32_t lastScale = 8, nextScale = 8; 10470ed5401bSmrg uint32_t jx; 10480ed5401bSmrg int deltaScale; 10490ed5401bSmrg 10500ed5401bSmrg for (jx = 0; jx < sizeOfScalingList; jx++) { 10510ed5401bSmrg if (nextScale != 0) { 10520ed5401bSmrg deltaScale = h264_se (bufInfo); 10530ed5401bSmrg nextScale = (lastScale + deltaScale + 256) % 256; 10540ed5401bSmrg } 10550ed5401bSmrg if (nextScale == 0) { 10560ed5401bSmrg lastScale = lastScale; 10570ed5401bSmrg } else { 10580ed5401bSmrg lastScale = nextScale; 10590ed5401bSmrg } 10600ed5401bSmrg } 10610ed5401bSmrg} 10620ed5401bSmrg 10630ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo) 10640ed5401bSmrg{ 10650ed5401bSmrg uint32_t temp; 10660ed5401bSmrg 10670ed5401bSmrg dec->profile = bs_read_u (bufInfo, 8); 10680ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set0_flag */ 10690ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set1_flag */ 10700ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set2_flag */ 10710ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set3_flag */ 10720ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set4_flag */ 10730ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set5_flag */ 10740ed5401bSmrg 10750ed5401bSmrg 10760ed5401bSmrg h264_check_0s (bufInfo, 2); 10770ed5401bSmrg dec->level_idc = bs_read_u (bufInfo, 8); 10780ed5401bSmrg bs_read_ue (bufInfo); /* SPS id*/ 10790ed5401bSmrg 10800ed5401bSmrg if (dec->profile == 100 || dec->profile == 110 || 10810ed5401bSmrg dec->profile == 122 || dec->profile == 144) { 10820ed5401bSmrg uint32_t chroma_format_idc = bs_read_ue (bufInfo); 10830ed5401bSmrg if (chroma_format_idc == 3) { 10840ed5401bSmrg bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */ 10850ed5401bSmrg } 10860ed5401bSmrg bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */ 10870ed5401bSmrg bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */ 10880ed5401bSmrg bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */ 10890ed5401bSmrg uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1); 10900ed5401bSmrg 10910ed5401bSmrg if (seq_scaling_matrix_present_flag) { 10920ed5401bSmrg for (uint32_t ix = 0; ix < 8; ix++) { 10930ed5401bSmrg temp = bs_read_u (bufInfo, 1); 10940ed5401bSmrg if (temp) { 10950ed5401bSmrg scaling_list (ix, ix < 6 ? 16 : 64, bufInfo); 10960ed5401bSmrg } 10970ed5401bSmrg } 10980ed5401bSmrg } 10990ed5401bSmrg } 11000ed5401bSmrg 11010ed5401bSmrg bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */ 11020ed5401bSmrg uint32_t pic_order_cnt_type = bs_read_ue (bufInfo); 11030ed5401bSmrg 11040ed5401bSmrg if (pic_order_cnt_type == 0) { 11050ed5401bSmrg bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */ 11060ed5401bSmrg } else if (pic_order_cnt_type == 1) { 11070ed5401bSmrg bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */ 11080ed5401bSmrg h264_se (bufInfo); /* offset_for_non_ref_pic */ 11090ed5401bSmrg h264_se (bufInfo); /* offset_for_top_to_bottom_field */ 11100ed5401bSmrg temp = bs_read_ue (bufInfo); 11110ed5401bSmrg for (uint32_t ix = 0; ix < temp; ix++) { 11120ed5401bSmrg h264_se (bufInfo); /* offset_for_ref_frame[index] */ 11130ed5401bSmrg } 11140ed5401bSmrg } 11150ed5401bSmrg bs_read_ue (bufInfo); /* num_ref_frames */ 11160ed5401bSmrg bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */ 11170ed5401bSmrg uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1; 11180ed5401bSmrg 11190ed5401bSmrg dec->pic_width = PicWidthInMbs * 16; 11200ed5401bSmrg uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1; 11210ed5401bSmrg 11220ed5401bSmrg dec->pic_height = PicHeightInMapUnits * 16; 11230ed5401bSmrg uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1); 11240ed5401bSmrg if (!frame_mbs_only_flag) { 11250ed5401bSmrg bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */ 11260ed5401bSmrg } 11270ed5401bSmrg bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */ 11280ed5401bSmrg temp = bs_read_u (bufInfo, 1); 11290ed5401bSmrg if (temp) { 11300ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_left_offset */ 11310ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_right_offset */ 11320ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_top_offset */ 11330ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_bottom_offset */ 11340ed5401bSmrg } 11350ed5401bSmrg temp = bs_read_u (bufInfo, 1); /* VUI Parameters */ 11360ed5401bSmrg} 11370ed5401bSmrg 11380ed5401bSmrgstatic void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo) 11390ed5401bSmrg{ 11400ed5401bSmrg uint32_t temp; 11410ed5401bSmrg 11420ed5401bSmrg bs_read_ue (bufInfo); /* first_mb_in_slice */ 11430ed5401bSmrg temp = bs_read_ue (bufInfo); 11440ed5401bSmrg dec->slice_type = ((temp > 5) ? (temp - 5) : temp); 11450ed5401bSmrg} 11460ed5401bSmrg 11470ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo) 11480ed5401bSmrg{ 11490ed5401bSmrg uint8_t type = 0; 11500ed5401bSmrg 11510ed5401bSmrg h264_check_0s (bufInfo, 1); 11520ed5401bSmrg dec->nal_ref_idc = bs_read_u (bufInfo, 2); 11530ed5401bSmrg dec->nal_unit_type = type = bs_read_u (bufInfo, 5); 11540ed5401bSmrg switch (type) 11550ed5401bSmrg { 11560ed5401bSmrg case H264_NAL_TYPE_NON_IDR_SLICE: 11570ed5401bSmrg case H264_NAL_TYPE_IDR_SLICE: 11580ed5401bSmrg h264_slice_header (dec, bufInfo); 11590ed5401bSmrg break; 11600ed5401bSmrg case H264_NAL_TYPE_SEQ_PARAM: 11610ed5401bSmrg h264_parse_sequence_parameter_set (dec, bufInfo); 11620ed5401bSmrg break; 11630ed5401bSmrg case H264_NAL_TYPE_PIC_PARAM: 11640ed5401bSmrg case H264_NAL_TYPE_SEI: 11650ed5401bSmrg case H264_NAL_TYPE_ACCESS_UNIT: 11660ed5401bSmrg case H264_NAL_TYPE_SEQ_EXTENSION: 11670ed5401bSmrg /* NOP */ 11680ed5401bSmrg break; 11690ed5401bSmrg default: 11700ed5401bSmrg printf ("Nal type unknown %d \n ", type); 11710ed5401bSmrg break; 11720ed5401bSmrg } 11730ed5401bSmrg return type; 11740ed5401bSmrg} 11750ed5401bSmrg 11760ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen) 11770ed5401bSmrg{ 11780ed5401bSmrg uint32_t val; 11790ed5401bSmrg uint32_t offset, startBytes; 11800ed5401bSmrg 11810ed5401bSmrg offset = startBytes = 0; 11820ed5401bSmrg if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) { 11830ed5401bSmrg pBuf += 4; 11840ed5401bSmrg offset = 4; 11850ed5401bSmrg startBytes = 1; 11860ed5401bSmrg } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) { 11870ed5401bSmrg pBuf += 3; 11880ed5401bSmrg offset = 3; 11890ed5401bSmrg startBytes = 1; 11900ed5401bSmrg } 11910ed5401bSmrg val = 0xffffffff; 11920ed5401bSmrg while (offset < bufLen - 3) { 11930ed5401bSmrg val <<= 8; 11940ed5401bSmrg val |= *pBuf++; 11950ed5401bSmrg offset++; 11960ed5401bSmrg if (val == H264_START_CODE) 11970ed5401bSmrg return offset - 4; 11980ed5401bSmrg 11990ed5401bSmrg if ((val & 0x00ffffff) == H264_START_CODE) 12000ed5401bSmrg return offset - 3; 12010ed5401bSmrg } 12020ed5401bSmrg if (bufLen - offset <= 3 && startBytes == 0) { 12030ed5401bSmrg startBytes = 0; 12040ed5401bSmrg return 0; 12050ed5401bSmrg } 12060ed5401bSmrg 12070ed5401bSmrg return offset; 12080ed5401bSmrg} 12090ed5401bSmrg 12100ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size) 12110ed5401bSmrg{ 12120ed5401bSmrg uint32_t buffer_pos = 0; 12130ed5401bSmrg int done = 0; 12140ed5401bSmrg h264_decode dec; 12150ed5401bSmrg 12160ed5401bSmrg memset(&dec, 0, sizeof(h264_decode)); 12170ed5401bSmrg do { 12180ed5401bSmrg uint32_t ret; 12190ed5401bSmrg 12200ed5401bSmrg ret = h264_find_next_start_code (buffer + buffer_pos, 12210ed5401bSmrg buffer_size - buffer_pos); 12220ed5401bSmrg if (ret == 0) { 12230ed5401bSmrg done = 1; 12240ed5401bSmrg if (buffer_pos == 0) { 12250ed5401bSmrg fprintf (stderr, 12260ed5401bSmrg "couldn't find start code in buffer from 0\n"); 12270ed5401bSmrg } 12280ed5401bSmrg } else { 12290ed5401bSmrg /* have a complete NAL from buffer_pos to end */ 12300ed5401bSmrg if (ret > 3) { 12310ed5401bSmrg uint32_t nal_len; 12320ed5401bSmrg bufferInfo bufinfo; 12330ed5401bSmrg 12340ed5401bSmrg nal_len = remove_03 (buffer + buffer_pos, ret); 12350ed5401bSmrg bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4); 12360ed5401bSmrg bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8; 12370ed5401bSmrg bufinfo.end = buffer + buffer_pos + nal_len; 12380ed5401bSmrg bufinfo.numOfBitsInBuffer = 8; 12390ed5401bSmrg bufinfo.decData = *bufinfo.decBuffer; 12400ed5401bSmrg h264_parse_nal (&dec, &bufinfo); 12410ed5401bSmrg } 12420ed5401bSmrg buffer_pos += ret; /* buffer_pos points to next code */ 12430ed5401bSmrg } 12440ed5401bSmrg } while (done == 0); 12450ed5401bSmrg 12460ed5401bSmrg if ((dec.pic_width == gWidth) && 12470ed5401bSmrg (dec.pic_height == gHeight) && 12480ed5401bSmrg (dec.slice_type == gSliceType)) 12490ed5401bSmrg return 0; 12500ed5401bSmrg else 12510ed5401bSmrg return -1; 12520ed5401bSmrg} 12530ed5401bSmrg 12540ed5401bSmrgstatic void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type) 12550ed5401bSmrg{ 12560ed5401bSmrg uint32_t *fb_ptr; 12570ed5401bSmrg uint8_t *bs_ptr; 12580ed5401bSmrg uint32_t size; 12590ed5401bSmrg int r; 12600ed5401bSmrg/* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */ 12610ed5401bSmrg 12620ed5401bSmrg r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr); 12630ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12640ed5401bSmrg fb_ptr = (uint32_t*)fb_buf.ptr; 12650ed5401bSmrg size = fb_ptr[6]; 12660ed5401bSmrg r = amdgpu_bo_cpu_unmap(fb_buf.handle); 12670ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12680ed5401bSmrg r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr); 12690ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12700ed5401bSmrg 12710ed5401bSmrg bs_ptr = (uint8_t*)bs_buf.ptr; 12720ed5401bSmrg r = verify_checksum(bs_ptr, size); 12730ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12740ed5401bSmrg r = amdgpu_bo_cpu_unmap(bs_buf.handle); 12750ed5401bSmrg 12760ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12770ed5401bSmrg} 12780ed5401bSmrg 1279bbff01ceSmrgstatic void amdgpu_cs_vcn_ib_zero_count(int *len, int num) 1280bbff01ceSmrg{ 1281bbff01ceSmrg for (int i = 0; i < num; i++) 1282bbff01ceSmrg ib_cpu[(*len)++] = 0; 1283bbff01ceSmrg} 1284bbff01ceSmrg 12850ed5401bSmrgstatic void amdgpu_cs_vcn_enc_encode_frame(int frame_type) 12860ed5401bSmrg{ 1287bbff01ceSmrg struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf; 1288bbff01ceSmrg int len, r; 12890ed5401bSmrg unsigned width = 160, height = 128, buf_size; 12900ed5401bSmrg uint32_t *p_task_size = NULL; 12910ed5401bSmrg uint32_t task_offset = 0, st_offset; 12920ed5401bSmrg uint32_t *st_size = NULL; 12930ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 12940ed5401bSmrg 1295b0ab5608Smrg if (vcn_ip_version_major == 2) { 12960ed5401bSmrg fw_maj = 1; 12970ed5401bSmrg fw_min = 1; 1298b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 12990ed5401bSmrg fw_maj = 1; 13000ed5401bSmrg fw_min = 0; 13010ed5401bSmrg } 13020ed5401bSmrg gSliceType = frame_type; 13030ed5401bSmrg buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 13040ed5401bSmrg 13050ed5401bSmrg num_resources = 0; 13060ed5401bSmrg alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 13070ed5401bSmrg alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 1308bbff01ceSmrg alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); 13090ed5401bSmrg resources[num_resources++] = enc_buf.handle; 13100ed5401bSmrg resources[num_resources++] = cpb_buf.handle; 13110ed5401bSmrg resources[num_resources++] = bs_buf.handle; 13120ed5401bSmrg resources[num_resources++] = fb_buf.handle; 1313bbff01ceSmrg resources[num_resources++] = input_buf.handle; 13140ed5401bSmrg resources[num_resources++] = ib_handle; 13150ed5401bSmrg 13160ed5401bSmrg 13170ed5401bSmrg r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr); 13180ed5401bSmrg memset(bs_buf.ptr, 0, 4096); 13190ed5401bSmrg r = amdgpu_bo_cpu_unmap(bs_buf.handle); 13200ed5401bSmrg 13210ed5401bSmrg r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr); 13220ed5401bSmrg memset(fb_buf.ptr, 0, 4096); 13230ed5401bSmrg r = amdgpu_bo_cpu_unmap(fb_buf.handle); 13240ed5401bSmrg 1325bbff01ceSmrg r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr); 13260ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 13270ed5401bSmrg 13280ed5401bSmrg for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) 1329bbff01ceSmrg memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); 13300ed5401bSmrg 1331bbff01ceSmrg r = amdgpu_bo_cpu_unmap(input_buf.handle); 13320ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 13330ed5401bSmrg 13340ed5401bSmrg len = 0; 1335b0ab5608Smrg 1336b0ab5608Smrg if (vcn_unified_ring) 1337b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 1338b0ab5608Smrg 13390ed5401bSmrg /* session info */ 13400ed5401bSmrg st_offset = len; 13410ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 13420ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 13430ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 13440ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 13450ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 13460ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */; 13470ed5401bSmrg *st_size = (len - st_offset) * 4; 13480ed5401bSmrg 13490ed5401bSmrg /* task info */ 13500ed5401bSmrg task_offset = len; 13510ed5401bSmrg st_offset = len; 13520ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 13530ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 13540ed5401bSmrg p_task_size = &ib_cpu[len++]; 13550ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 13560ed5401bSmrg ib_cpu[len++] = 1; /* feedback */ 13570ed5401bSmrg *st_size = (len - st_offset) * 4; 13580ed5401bSmrg 13590ed5401bSmrg if (frame_type == 2) { 13600ed5401bSmrg /* sps */ 13610ed5401bSmrg st_offset = len; 13620ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1363b0ab5608Smrg if(vcn_ip_version_major == 1) 13640ed5401bSmrg ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ 13650ed5401bSmrg else 1366bbff01ceSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */ 13670ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ 13680ed5401bSmrg ib_cpu[len++] = 0x00000011; /* sps len */ 13690ed5401bSmrg ib_cpu[len++] = 0x00000001; /* start code */ 13700ed5401bSmrg ib_cpu[len++] = 0x6764440b; 13710ed5401bSmrg ib_cpu[len++] = 0xac54c284; 13720ed5401bSmrg ib_cpu[len++] = 0x68078442; 13730ed5401bSmrg ib_cpu[len++] = 0x37000000; 13740ed5401bSmrg *st_size = (len - st_offset) * 4; 13750ed5401bSmrg 13760ed5401bSmrg /* pps */ 13770ed5401bSmrg st_offset = len; 13780ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1379b0ab5608Smrg if(vcn_ip_version_major == 1) 13800ed5401bSmrg ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ 13810ed5401bSmrg else 1382bbff01ceSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/ 13830ed5401bSmrg ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ 13840ed5401bSmrg ib_cpu[len++] = 0x00000008; /* pps len */ 13850ed5401bSmrg ib_cpu[len++] = 0x00000001; /* start code */ 13860ed5401bSmrg ib_cpu[len++] = 0x68ce3c80; 13870ed5401bSmrg *st_size = (len - st_offset) * 4; 13880ed5401bSmrg } 13890ed5401bSmrg 13900ed5401bSmrg /* slice header */ 13910ed5401bSmrg st_offset = len; 13920ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1393b0ab5608Smrg if(vcn_ip_version_major == 1) 13940ed5401bSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ 13950ed5401bSmrg else 1396bbff01ceSmrg ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */ 13970ed5401bSmrg if (frame_type == 2) { 13980ed5401bSmrg ib_cpu[len++] = 0x65000000; 13990ed5401bSmrg ib_cpu[len++] = 0x11040000; 14000ed5401bSmrg } else { 14010ed5401bSmrg ib_cpu[len++] = 0x41000000; 14020ed5401bSmrg ib_cpu[len++] = 0x34210000; 14030ed5401bSmrg } 14040ed5401bSmrg ib_cpu[len++] = 0xe0000000; 1405bbff01ceSmrg amdgpu_cs_vcn_ib_zero_count(&len, 13); 14060ed5401bSmrg 14070ed5401bSmrg ib_cpu[len++] = 0x00000001; 14080ed5401bSmrg ib_cpu[len++] = 0x00000008; 14090ed5401bSmrg ib_cpu[len++] = 0x00020000; 14100ed5401bSmrg ib_cpu[len++] = 0x00000000; 14110ed5401bSmrg ib_cpu[len++] = 0x00000001; 14120ed5401bSmrg ib_cpu[len++] = 0x00000015; 14130ed5401bSmrg ib_cpu[len++] = 0x00020001; 14140ed5401bSmrg ib_cpu[len++] = 0x00000000; 14150ed5401bSmrg ib_cpu[len++] = 0x00000001; 14160ed5401bSmrg ib_cpu[len++] = 0x00000003; 1417bbff01ceSmrg amdgpu_cs_vcn_ib_zero_count(&len, 22); 14180ed5401bSmrg *st_size = (len - st_offset) * 4; 14190ed5401bSmrg 14200ed5401bSmrg /* encode params */ 14210ed5401bSmrg st_offset = len; 14220ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1423b0ab5608Smrg if(vcn_ip_version_major == 1) 1424bbff01ceSmrg ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */ 14250ed5401bSmrg else 1426bbff01ceSmrg ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */ 14270ed5401bSmrg ib_cpu[len++] = frame_type; 14280ed5401bSmrg ib_cpu[len++] = 0x0001f000; 1429bbff01ceSmrg ib_cpu[len++] = input_buf.addr >> 32; 1430bbff01ceSmrg ib_cpu[len++] = input_buf.addr; 1431bbff01ceSmrg ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; 1432bbff01ceSmrg ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); 14330ed5401bSmrg ib_cpu[len++] = 0x00000100; 14340ed5401bSmrg ib_cpu[len++] = 0x00000080; 14350ed5401bSmrg ib_cpu[len++] = 0x00000000; 14360ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14370ed5401bSmrg ib_cpu[len++] = 0x00000000; 14380ed5401bSmrg *st_size = (len - st_offset) * 4; 14390ed5401bSmrg 14400ed5401bSmrg /* encode params h264 */ 14410ed5401bSmrg st_offset = len; 14420ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 14430ed5401bSmrg ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ 1444bbff01ceSmrg if (vcn_ip_version_major <= 2) { 14450ed5401bSmrg ib_cpu[len++] = 0x00000000; 14460ed5401bSmrg ib_cpu[len++] = 0x00000000; 14470ed5401bSmrg ib_cpu[len++] = 0x00000000; 14480ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14490ed5401bSmrg } else { 14500ed5401bSmrg ib_cpu[len++] = 0x00000000; 14510ed5401bSmrg ib_cpu[len++] = 0x00000000; 14520ed5401bSmrg ib_cpu[len++] = 0x00000000; 14530ed5401bSmrg ib_cpu[len++] = 0x00000000; 14540ed5401bSmrg ib_cpu[len++] = 0x00000000; 14550ed5401bSmrg ib_cpu[len++] = 0x00000000; 14560ed5401bSmrg ib_cpu[len++] = 0x00000000; 14570ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14580ed5401bSmrg ib_cpu[len++] = 0x00000000; 14590ed5401bSmrg ib_cpu[len++] = 0x00000000; 14600ed5401bSmrg ib_cpu[len++] = 0x00000000; 14610ed5401bSmrg ib_cpu[len++] = 0x00000000; 14620ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14630ed5401bSmrg ib_cpu[len++] = 0x00000000; 14640ed5401bSmrg ib_cpu[len++] = 0x00000000; 14650ed5401bSmrg ib_cpu[len++] = 0x00000000; 14660ed5401bSmrg ib_cpu[len++] = 0x00000000; 1467bbff01ceSmrg ib_cpu[len++] = 0x00000001; 14680ed5401bSmrg } 14690ed5401bSmrg *st_size = (len - st_offset) * 4; 14700ed5401bSmrg 14710ed5401bSmrg /* encode context */ 14720ed5401bSmrg st_offset = len; 14730ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1474b0ab5608Smrg if(vcn_ip_version_major == 1) 14750ed5401bSmrg ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ 14760ed5401bSmrg else 1477bbff01ceSmrg ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER other vcn */ 14780ed5401bSmrg ib_cpu[len++] = cpb_buf.addr >> 32; 14790ed5401bSmrg ib_cpu[len++] = cpb_buf.addr; 14800ed5401bSmrg ib_cpu[len++] = 0x00000000; /* swizzle mode */ 14810ed5401bSmrg ib_cpu[len++] = 0x00000100; /* luma pitch */ 14820ed5401bSmrg ib_cpu[len++] = 0x00000100; /* chroma pitch */ 1483bbff01ceSmrg ib_cpu[len++] = 0x00000002; /* no reconstructed picture */ 14840ed5401bSmrg ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ 14850ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ 1486bbff01ceSmrg if(vcn_ip_version_major == 4) 1487bbff01ceSmrg amdgpu_cs_vcn_ib_zero_count(&len, 2); 14880ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ 14890ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ 14900ed5401bSmrg 1491bbff01ceSmrg amdgpu_cs_vcn_ib_zero_count(&len, 280); 14920ed5401bSmrg *st_size = (len - st_offset) * 4; 14930ed5401bSmrg 14940ed5401bSmrg /* bitstream buffer */ 14950ed5401bSmrg st_offset = len; 14960ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1497b0ab5608Smrg if(vcn_ip_version_major == 1) 14980ed5401bSmrg ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ 14990ed5401bSmrg else 1500bbff01ceSmrg ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER other vcn */ 1501bbff01ceSmrg 15020ed5401bSmrg ib_cpu[len++] = 0x00000000; /* mode */ 15030ed5401bSmrg ib_cpu[len++] = bs_buf.addr >> 32; 15040ed5401bSmrg ib_cpu[len++] = bs_buf.addr; 15050ed5401bSmrg ib_cpu[len++] = 0x0001f000; 15060ed5401bSmrg ib_cpu[len++] = 0x00000000; 15070ed5401bSmrg *st_size = (len - st_offset) * 4; 15080ed5401bSmrg 15090ed5401bSmrg /* feedback */ 15100ed5401bSmrg st_offset = len; 15110ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1512b0ab5608Smrg if(vcn_ip_version_major == 1) 15130ed5401bSmrg ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ 15140ed5401bSmrg else 15150ed5401bSmrg ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ 15160ed5401bSmrg ib_cpu[len++] = 0x00000000; 15170ed5401bSmrg ib_cpu[len++] = fb_buf.addr >> 32; 15180ed5401bSmrg ib_cpu[len++] = fb_buf.addr; 15190ed5401bSmrg ib_cpu[len++] = 0x00000010; 15200ed5401bSmrg ib_cpu[len++] = 0x00000028; 15210ed5401bSmrg *st_size = (len - st_offset) * 4; 15220ed5401bSmrg 15230ed5401bSmrg /* intra refresh */ 15240ed5401bSmrg st_offset = len; 15250ed5401bSmrg st_size = &ib_cpu[len++]; 1526b0ab5608Smrg if(vcn_ip_version_major == 1) 15270ed5401bSmrg ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ 15280ed5401bSmrg else 15290ed5401bSmrg ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ 15300ed5401bSmrg ib_cpu[len++] = 0x00000000; 15310ed5401bSmrg ib_cpu[len++] = 0x00000000; 15320ed5401bSmrg ib_cpu[len++] = 0x00000000; 15330ed5401bSmrg *st_size = (len - st_offset) * 4; 15340ed5401bSmrg 1535b0ab5608Smrg if(vcn_ip_version_major != 1) { 15360ed5401bSmrg /* Input Format */ 15370ed5401bSmrg st_offset = len; 15380ed5401bSmrg st_size = &ib_cpu[len++]; 15390ed5401bSmrg ib_cpu[len++] = 0x0000000c; 15400ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 15410ed5401bSmrg ib_cpu[len++] = 0x00000000; 15420ed5401bSmrg ib_cpu[len++] = 0x00000000; 15430ed5401bSmrg ib_cpu[len++] = 0x00000000; 15440ed5401bSmrg ib_cpu[len++] = 0x00000000; 15450ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 15460ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */ 15470ed5401bSmrg *st_size = (len - st_offset) * 4; 15480ed5401bSmrg 15490ed5401bSmrg /* Output Format */ 15500ed5401bSmrg st_offset = len; 15510ed5401bSmrg st_size = &ib_cpu[len++]; 15520ed5401bSmrg ib_cpu[len++] = 0x0000000d; 15530ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 15540ed5401bSmrg ib_cpu[len++] = 0x00000000; 15550ed5401bSmrg ib_cpu[len++] = 0x00000000; 15560ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 15570ed5401bSmrg *st_size = (len - st_offset) * 4; 15580ed5401bSmrg } 15590ed5401bSmrg /* op_speed */ 15600ed5401bSmrg st_offset = len; 15610ed5401bSmrg st_size = &ib_cpu[len++]; 15620ed5401bSmrg ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */ 15630ed5401bSmrg *st_size = (len - st_offset) * 4; 15640ed5401bSmrg 15650ed5401bSmrg /* op_enc */ 15660ed5401bSmrg st_offset = len; 15670ed5401bSmrg st_size = &ib_cpu[len++]; 15680ed5401bSmrg ib_cpu[len++] = 0x01000003; 15690ed5401bSmrg *st_size = (len - st_offset) * 4; 15700ed5401bSmrg 15710ed5401bSmrg *p_task_size = (len - task_offset) * 4; 1572b0ab5608Smrg 1573b0ab5608Smrg if (vcn_unified_ring) 1574b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 1575b0ab5608Smrg 15760ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 15770ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 15780ed5401bSmrg 15790ed5401bSmrg /* check result */ 15800ed5401bSmrg check_result(fb_buf, bs_buf, frame_type); 15810ed5401bSmrg 15820ed5401bSmrg free_resource(&fb_buf); 15830ed5401bSmrg free_resource(&bs_buf); 1584bbff01ceSmrg free_resource(&input_buf); 1585d8807b2fSmrg} 1586d8807b2fSmrg 1587d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void) 1588d8807b2fSmrg{ 15890ed5401bSmrg amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */ 1590d8807b2fSmrg} 1591d8807b2fSmrg 1592d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void) 1593d8807b2fSmrg{ 15940ed5401bSmrg int len = 0, r; 15950ed5401bSmrg uint32_t *p_task_size = NULL; 15960ed5401bSmrg uint32_t task_offset = 0, st_offset; 15970ed5401bSmrg uint32_t *st_size = NULL; 15980ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 15990ed5401bSmrg 1600b0ab5608Smrg if (vcn_ip_version_major == 2) { 16010ed5401bSmrg fw_maj = 1; 16020ed5401bSmrg fw_min = 1; 1603b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 16040ed5401bSmrg fw_maj = 1; 16050ed5401bSmrg fw_min = 0; 16060ed5401bSmrg } 16070ed5401bSmrg 16080ed5401bSmrg num_resources = 0; 16090ed5401bSmrg/* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */ 16100ed5401bSmrg resources[num_resources++] = enc_buf.handle; 16110ed5401bSmrg resources[num_resources++] = ib_handle; 16120ed5401bSmrg 1613b0ab5608Smrg if (vcn_unified_ring) 1614b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 1615b0ab5608Smrg 16160ed5401bSmrg /* session info */ 16170ed5401bSmrg st_offset = len; 16180ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 16190ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 16200ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 16210ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 16220ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 16230ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 16240ed5401bSmrg *st_size = (len - st_offset) * 4; 16250ed5401bSmrg 16260ed5401bSmrg /* task info */ 16270ed5401bSmrg task_offset = len; 16280ed5401bSmrg st_offset = len; 16290ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 16300ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 16310ed5401bSmrg p_task_size = &ib_cpu[len++]; 16320ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 16330ed5401bSmrg ib_cpu[len++] = 0; /* feedback */ 16340ed5401bSmrg *st_size = (len - st_offset) * 4; 16350ed5401bSmrg 16360ed5401bSmrg /* op close */ 16370ed5401bSmrg st_offset = len; 16380ed5401bSmrg st_size = &ib_cpu[len++]; 16390ed5401bSmrg ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */ 16400ed5401bSmrg *st_size = (len - st_offset) * 4; 16410ed5401bSmrg 16420ed5401bSmrg *p_task_size = (len - task_offset) * 4; 16430ed5401bSmrg 1644b0ab5608Smrg if (vcn_unified_ring) 1645b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 1646b0ab5608Smrg 16470ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 16480ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 16490ed5401bSmrg 16500ed5401bSmrg free_resource(&cpb_buf); 16510ed5401bSmrg free_resource(&enc_buf); 1652d8807b2fSmrg} 1653