vcn_tests.c revision b0ab5608
1d8807b2fSmrg/* 2d8807b2fSmrg * Copyright 2017 Advanced Micro Devices, Inc. 3d8807b2fSmrg * 4d8807b2fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5d8807b2fSmrg * copy of this software and associated documentation files (the "Software"), 6d8807b2fSmrg * to deal in the Software without restriction, including without limitation 7d8807b2fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d8807b2fSmrg * and/or sell copies of the Software, and to permit persons to whom the 9d8807b2fSmrg * Software is furnished to do so, subject to the following conditions: 10d8807b2fSmrg * 11d8807b2fSmrg * The above copyright notice and this permission notice shall be included in 12d8807b2fSmrg * all copies or substantial portions of the Software. 13d8807b2fSmrg * 14d8807b2fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d8807b2fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d8807b2fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d8807b2fSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18d8807b2fSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19d8807b2fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20d8807b2fSmrg * OTHER DEALINGS IN THE SOFTWARE. 21d8807b2fSmrg * 22d8807b2fSmrg*/ 23d8807b2fSmrg 24d8807b2fSmrg#include <stdio.h> 250ed5401bSmrg#include <string.h> 26d8807b2fSmrg#include <inttypes.h> 270ed5401bSmrg#include <unistd.h> 28d8807b2fSmrg 29d8807b2fSmrg#include "CUnit/Basic.h" 30d8807b2fSmrg 31b0ab5608Smrg#include <unistd.h> 32d8807b2fSmrg#include "util_math.h" 33d8807b2fSmrg 34d8807b2fSmrg#include "amdgpu_test.h" 35d8807b2fSmrg#include "amdgpu_drm.h" 36d8807b2fSmrg#include "amdgpu_internal.h" 37d8807b2fSmrg#include "decode_messages.h" 380ed5401bSmrg#include "frame.h" 39d8807b2fSmrg 40d8807b2fSmrg#define IB_SIZE 4096 41d8807b2fSmrg#define MAX_RESOURCES 16 42d8807b2fSmrg 43b0ab5608Smrg#define DECODE_CMD_MSG_BUFFER 0x00000000 44b0ab5608Smrg#define DECODE_CMD_DPB_BUFFER 0x00000001 45b0ab5608Smrg#define DECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 46b0ab5608Smrg#define DECODE_CMD_FEEDBACK_BUFFER 0x00000003 47b0ab5608Smrg#define DECODE_CMD_PROB_TBL_BUFFER 0x00000004 48b0ab5608Smrg#define DECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 49b0ab5608Smrg#define DECODE_CMD_BITSTREAM_BUFFER 0x00000100 50b0ab5608Smrg#define DECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 51b0ab5608Smrg#define DECODE_CMD_CONTEXT_BUFFER 0x00000206 52b0ab5608Smrg 53b0ab5608Smrg#define DECODE_IB_PARAM_DECODE_BUFFER (0x00000001) 54b0ab5608Smrg 55b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_MSG_BUFFER (0x00000001) 56b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DPB_BUFFER (0x00000002) 57b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER (0x00000004) 58b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER (0x00000008) 59b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER (0x00000010) 60b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER (0x00000200) 61b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER (0x00000800) 62b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER (0x00001000) 63b0ab5608Smrg#define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000) 64b0ab5608Smrg 65b0ab5608Smrgstatic bool vcn_dec_sw_ring = false; 66b0ab5608Smrgstatic bool vcn_unified_ring = false; 67b0ab5608Smrg 680ed5401bSmrg#define H264_NAL_TYPE_NON_IDR_SLICE 1 690ed5401bSmrg#define H264_NAL_TYPE_DP_A_SLICE 2 700ed5401bSmrg#define H264_NAL_TYPE_DP_B_SLICE 3 710ed5401bSmrg#define H264_NAL_TYPE_DP_C_SLICE 0x4 720ed5401bSmrg#define H264_NAL_TYPE_IDR_SLICE 0x5 730ed5401bSmrg#define H264_NAL_TYPE_SEI 0x6 740ed5401bSmrg#define H264_NAL_TYPE_SEQ_PARAM 0x7 750ed5401bSmrg#define H264_NAL_TYPE_PIC_PARAM 0x8 760ed5401bSmrg#define H264_NAL_TYPE_ACCESS_UNIT 0x9 770ed5401bSmrg#define H264_NAL_TYPE_END_OF_SEQ 0xa 780ed5401bSmrg#define H264_NAL_TYPE_END_OF_STREAM 0xb 790ed5401bSmrg#define H264_NAL_TYPE_FILLER_DATA 0xc 800ed5401bSmrg#define H264_NAL_TYPE_SEQ_EXTENSION 0xd 810ed5401bSmrg 820ed5401bSmrg#define H264_START_CODE 0x000001 830ed5401bSmrg 84d8807b2fSmrgstruct amdgpu_vcn_bo { 85d8807b2fSmrg amdgpu_bo_handle handle; 86d8807b2fSmrg amdgpu_va_handle va_handle; 87d8807b2fSmrg uint64_t addr; 88d8807b2fSmrg uint64_t size; 89d8807b2fSmrg uint8_t *ptr; 90d8807b2fSmrg}; 91d8807b2fSmrg 92b0ab5608Smrgtypedef struct rvcn_decode_buffer_s { 93b0ab5608Smrg unsigned int valid_buf_flag; 94b0ab5608Smrg unsigned int msg_buffer_address_hi; 95b0ab5608Smrg unsigned int msg_buffer_address_lo; 96b0ab5608Smrg unsigned int dpb_buffer_address_hi; 97b0ab5608Smrg unsigned int dpb_buffer_address_lo; 98b0ab5608Smrg unsigned int target_buffer_address_hi; 99b0ab5608Smrg unsigned int target_buffer_address_lo; 100b0ab5608Smrg unsigned int session_contex_buffer_address_hi; 101b0ab5608Smrg unsigned int session_contex_buffer_address_lo; 102b0ab5608Smrg unsigned int bitstream_buffer_address_hi; 103b0ab5608Smrg unsigned int bitstream_buffer_address_lo; 104b0ab5608Smrg unsigned int context_buffer_address_hi; 105b0ab5608Smrg unsigned int context_buffer_address_lo; 106b0ab5608Smrg unsigned int feedback_buffer_address_hi; 107b0ab5608Smrg unsigned int feedback_buffer_address_lo; 108b0ab5608Smrg unsigned int luma_hist_buffer_address_hi; 109b0ab5608Smrg unsigned int luma_hist_buffer_address_lo; 110b0ab5608Smrg unsigned int prob_tbl_buffer_address_hi; 111b0ab5608Smrg unsigned int prob_tbl_buffer_address_lo; 112b0ab5608Smrg unsigned int sclr_coeff_buffer_address_hi; 113b0ab5608Smrg unsigned int sclr_coeff_buffer_address_lo; 114b0ab5608Smrg unsigned int it_sclr_table_buffer_address_hi; 115b0ab5608Smrg unsigned int it_sclr_table_buffer_address_lo; 116b0ab5608Smrg unsigned int sclr_target_buffer_address_hi; 117b0ab5608Smrg unsigned int sclr_target_buffer_address_lo; 118b0ab5608Smrg unsigned int cenc_size_info_buffer_address_hi; 119b0ab5608Smrg unsigned int cenc_size_info_buffer_address_lo; 120b0ab5608Smrg unsigned int mpeg2_pic_param_buffer_address_hi; 121b0ab5608Smrg unsigned int mpeg2_pic_param_buffer_address_lo; 122b0ab5608Smrg unsigned int mpeg2_mb_control_buffer_address_hi; 123b0ab5608Smrg unsigned int mpeg2_mb_control_buffer_address_lo; 124b0ab5608Smrg unsigned int mpeg2_idct_coeff_buffer_address_hi; 125b0ab5608Smrg unsigned int mpeg2_idct_coeff_buffer_address_lo; 126b0ab5608Smrg} rvcn_decode_buffer_t; 127b0ab5608Smrg 128b0ab5608Smrgtypedef struct rvcn_decode_ib_package_s { 129b0ab5608Smrg unsigned int package_size; 130b0ab5608Smrg unsigned int package_type; 131b0ab5608Smrg} rvcn_decode_ib_package_t; 132b0ab5608Smrg 133b0ab5608Smrg 1345324fb0dSmrgstruct amdgpu_vcn_reg { 1355324fb0dSmrg uint32_t data0; 1365324fb0dSmrg uint32_t data1; 1375324fb0dSmrg uint32_t cmd; 1385324fb0dSmrg uint32_t nop; 1395324fb0dSmrg uint32_t cntl; 1405324fb0dSmrg}; 1415324fb0dSmrg 1420ed5401bSmrgtypedef struct BufferInfo_t { 1430ed5401bSmrg uint32_t numOfBitsInBuffer; 1440ed5401bSmrg const uint8_t *decBuffer; 1450ed5401bSmrg uint8_t decData; 1460ed5401bSmrg uint32_t decBufferSize; 1470ed5401bSmrg const uint8_t *end; 1480ed5401bSmrg} bufferInfo; 1490ed5401bSmrg 1500ed5401bSmrgtypedef struct h264_decode_t { 1510ed5401bSmrg uint8_t profile; 1520ed5401bSmrg uint8_t level_idc; 1530ed5401bSmrg uint8_t nal_ref_idc; 1540ed5401bSmrg uint8_t nal_unit_type; 1550ed5401bSmrg uint32_t pic_width, pic_height; 1560ed5401bSmrg uint32_t slice_type; 1570ed5401bSmrg} h264_decode; 1580ed5401bSmrg 159d8807b2fSmrgstatic amdgpu_device_handle device_handle; 160d8807b2fSmrgstatic uint32_t major_version; 161d8807b2fSmrgstatic uint32_t minor_version; 162d8807b2fSmrgstatic uint32_t family_id; 16341687f09Smrgstatic uint32_t chip_rev; 16441687f09Smrgstatic uint32_t chip_id; 1659bd392adSmrgstatic uint32_t asic_id; 16641687f09Smrgstatic uint32_t chip_rev; 1670ed5401bSmrgstatic struct amdgpu_vcn_bo enc_buf; 1680ed5401bSmrgstatic struct amdgpu_vcn_bo cpb_buf; 1690ed5401bSmrgstatic uint32_t enc_task_id; 170d8807b2fSmrg 171d8807b2fSmrgstatic amdgpu_context_handle context_handle; 172d8807b2fSmrgstatic amdgpu_bo_handle ib_handle; 173d8807b2fSmrgstatic amdgpu_va_handle ib_va_handle; 174d8807b2fSmrgstatic uint64_t ib_mc_address; 175d8807b2fSmrgstatic uint32_t *ib_cpu; 176b0ab5608Smrgstatic uint32_t *ib_checksum; 177b0ab5608Smrgstatic uint32_t *ib_size_in_dw; 178b0ab5608Smrg 179b0ab5608Smrgstatic rvcn_decode_buffer_t *decode_buffer; 180d8807b2fSmrg 181d8807b2fSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES]; 182d8807b2fSmrgstatic unsigned num_resources; 1830ed5401bSmrg 1840ed5401bSmrgstatic uint8_t vcn_reg_index; 1850ed5401bSmrgstatic struct amdgpu_vcn_reg reg[] = { 1860ed5401bSmrg {0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6}, 1870ed5401bSmrg {0x504, 0x505, 0x503, 0x53f, 0x506}, 1880ed5401bSmrg {0x10, 0x11, 0xf, 0x29, 0x26d}, 1890ed5401bSmrg}; 1900ed5401bSmrg 1910ed5401bSmrguint32_t gWidth, gHeight, gSliceType; 192b0ab5608Smrgstatic uint32_t vcn_ip_version_major; 193b0ab5608Smrgstatic uint32_t vcn_ip_version_minor; 194d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void); 195d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void); 196d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void); 197d8807b2fSmrg 198d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void); 199d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void); 200d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void); 201d8807b2fSmrg 202b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc); 203b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end); 2040ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count); 2050ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo); 2060ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo); 2070ed5401bSmrgstatic inline int bs_eof(bufferInfo *bufinfo); 2080ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n); 2090ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo); 2100ed5401bSmrgstatic uint32_t remove_03 (uint8_t *bptr, uint32_t len); 2110ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo); 2120ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo); 2130ed5401bSmrgstatic void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo); 2140ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo); 2150ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen); 2160ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size); 2170ed5401bSmrg 218d8807b2fSmrgCU_TestInfo vcn_tests[] = { 219d8807b2fSmrg 220d8807b2fSmrg { "VCN DEC create", amdgpu_cs_vcn_dec_create }, 221d8807b2fSmrg { "VCN DEC decode", amdgpu_cs_vcn_dec_decode }, 222d8807b2fSmrg { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy }, 223d8807b2fSmrg 224d8807b2fSmrg { "VCN ENC create", amdgpu_cs_vcn_enc_create }, 2250ed5401bSmrg { "VCN ENC encode", amdgpu_cs_vcn_enc_encode }, 226d8807b2fSmrg { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy }, 227d8807b2fSmrg CU_TEST_INFO_NULL, 228d8807b2fSmrg}; 229d8807b2fSmrg 23000a23bdaSmrgCU_BOOL suite_vcn_tests_enable(void) 23100a23bdaSmrg{ 23241687f09Smrg struct drm_amdgpu_info_hw_ip info; 233b0ab5608Smrg bool enc_ring, dec_ring; 234b0ab5608Smrg int r; 23500a23bdaSmrg 23600a23bdaSmrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 23700a23bdaSmrg &minor_version, &device_handle)) 23800a23bdaSmrg return CU_FALSE; 23900a23bdaSmrg 24000a23bdaSmrg family_id = device_handle->info.family_id; 2419bd392adSmrg asic_id = device_handle->info.asic_id; 24241687f09Smrg chip_rev = device_handle->info.chip_rev; 24341687f09Smrg chip_id = device_handle->info.chip_external_rev; 24441687f09Smrg 245b0ab5608Smrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info); 246b0ab5608Smrg if (!r) { 247b0ab5608Smrg vcn_ip_version_major = info.hw_ip_version_major; 248b0ab5608Smrg vcn_ip_version_minor = info.hw_ip_version_minor; 249b0ab5608Smrg enc_ring = !!info.available_rings; 250b0ab5608Smrg /* in vcn 4.0 it re-uses encoding queue as unified queue */ 251b0ab5608Smrg if (vcn_ip_version_major >= 4) { 252b0ab5608Smrg vcn_unified_ring = true; 253b0ab5608Smrg vcn_dec_sw_ring = true; 254b0ab5608Smrg dec_ring = enc_ring; 255b0ab5608Smrg } else { 256b0ab5608Smrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); 257b0ab5608Smrg dec_ring = !!info.available_rings; 258b0ab5608Smrg } 259b0ab5608Smrg } 26000a23bdaSmrg 26100a23bdaSmrg if (amdgpu_device_deinitialize(device_handle)) 2620ed5401bSmrg return CU_FALSE; 26300a23bdaSmrg 264b0ab5608Smrg if (r) { 265b0ab5608Smrg printf("\n\nASIC query hw info failed\n"); 266b0ab5608Smrg return CU_FALSE; 267b0ab5608Smrg } 268b0ab5608Smrg 269b0ab5608Smrg if (!(dec_ring || enc_ring) || 27041687f09Smrg (family_id < AMDGPU_FAMILY_RV && 27141687f09Smrg (family_id == AMDGPU_FAMILY_AI && 2724babd585Smrg (chip_id - chip_rev) < 0x32))) { /* Arcturus */ 27300a23bdaSmrg printf("\n\nThe ASIC NOT support VCN, suite disabled\n"); 27400a23bdaSmrg return CU_FALSE; 27500a23bdaSmrg } 27600a23bdaSmrg 277b0ab5608Smrg if (!dec_ring) { 278b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE); 279b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE); 280b0ab5608Smrg amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE); 281b0ab5608Smrg } 282b0ab5608Smrg 283b0ab5608Smrg if (family_id == AMDGPU_FAMILY_AI || !enc_ring) { 28441687f09Smrg amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); 2850ed5401bSmrg amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); 28641687f09Smrg amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); 28741687f09Smrg } 28841687f09Smrg 289b0ab5608Smrg if (vcn_ip_version_major == 1) 2900ed5401bSmrg vcn_reg_index = 0; 291b0ab5608Smrg else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0) 2920ed5401bSmrg vcn_reg_index = 1; 293b0ab5608Smrg else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) || 294b0ab5608Smrg vcn_ip_version_major == 3) 2950ed5401bSmrg vcn_reg_index = 2; 2965324fb0dSmrg 29700a23bdaSmrg return CU_TRUE; 29800a23bdaSmrg} 29900a23bdaSmrg 300d8807b2fSmrgint suite_vcn_tests_init(void) 301d8807b2fSmrg{ 302d8807b2fSmrg int r; 303d8807b2fSmrg 304d8807b2fSmrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 305d8807b2fSmrg &minor_version, &device_handle); 306d8807b2fSmrg if (r) 307d8807b2fSmrg return CUE_SINIT_FAILED; 308d8807b2fSmrg 309d8807b2fSmrg family_id = device_handle->info.family_id; 310d8807b2fSmrg 311d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 312d8807b2fSmrg if (r) 313d8807b2fSmrg return CUE_SINIT_FAILED; 314d8807b2fSmrg 315d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, 316d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 317d8807b2fSmrg &ib_handle, (void**)&ib_cpu, 318d8807b2fSmrg &ib_mc_address, &ib_va_handle); 319d8807b2fSmrg if (r) 320d8807b2fSmrg return CUE_SINIT_FAILED; 321d8807b2fSmrg 322d8807b2fSmrg return CUE_SUCCESS; 323d8807b2fSmrg} 324d8807b2fSmrg 325d8807b2fSmrgint suite_vcn_tests_clean(void) 326d8807b2fSmrg{ 327d8807b2fSmrg int r; 328d8807b2fSmrg 32900a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 33000a23bdaSmrg ib_mc_address, IB_SIZE); 33100a23bdaSmrg if (r) 33200a23bdaSmrg return CUE_SCLEAN_FAILED; 33300a23bdaSmrg 33400a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 33500a23bdaSmrg if (r) 33600a23bdaSmrg return CUE_SCLEAN_FAILED; 33700a23bdaSmrg 33800a23bdaSmrg r = amdgpu_device_deinitialize(device_handle); 33900a23bdaSmrg if (r) 34000a23bdaSmrg return CUE_SCLEAN_FAILED; 341d8807b2fSmrg 342d8807b2fSmrg return CUE_SUCCESS; 343d8807b2fSmrg} 344d8807b2fSmrg 345b0ab5608Smrgstatic void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc) 346b0ab5608Smrg{ 347b0ab5608Smrg /* signature */ 348b0ab5608Smrg *(base + (*offset)++) = 0x00000010; 349b0ab5608Smrg *(base + (*offset)++) = 0x30000002; 350b0ab5608Smrg ib_checksum = base + (*offset)++; 351b0ab5608Smrg ib_size_in_dw = base + (*offset)++; 352b0ab5608Smrg 353b0ab5608Smrg /* engine info */ 354b0ab5608Smrg *(base + (*offset)++) = 0x00000010; 355b0ab5608Smrg *(base + (*offset)++) = 0x30000001; 356b0ab5608Smrg *(base + (*offset)++) = enc ? 2 : 3; 357b0ab5608Smrg *(base + (*offset)++) = 0x00000000; 358b0ab5608Smrg} 359b0ab5608Smrg 360b0ab5608Smrgstatic void amdgpu_cs_sq_ib_tail(uint32_t *end) 361b0ab5608Smrg{ 362b0ab5608Smrg uint32_t size_in_dw; 363b0ab5608Smrg uint32_t checksum = 0; 364b0ab5608Smrg 365b0ab5608Smrg /* if the pointers are invalid, no need to process */ 366b0ab5608Smrg if (ib_checksum == NULL || ib_size_in_dw == NULL) 367b0ab5608Smrg return; 368b0ab5608Smrg 369b0ab5608Smrg size_in_dw = end - ib_size_in_dw - 1; 370b0ab5608Smrg *ib_size_in_dw = size_in_dw; 371b0ab5608Smrg *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); 372b0ab5608Smrg 373b0ab5608Smrg for (int i = 0; i < size_in_dw; i++) 374b0ab5608Smrg checksum += *(ib_checksum + 2 + i); 375b0ab5608Smrg 376b0ab5608Smrg *ib_checksum = checksum; 377b0ab5608Smrg 378b0ab5608Smrg ib_checksum = NULL; 379b0ab5608Smrg ib_size_in_dw = NULL; 380b0ab5608Smrg} 381b0ab5608Smrg 382d8807b2fSmrgstatic int submit(unsigned ndw, unsigned ip) 383d8807b2fSmrg{ 384d8807b2fSmrg struct amdgpu_cs_request ibs_request = {0}; 385d8807b2fSmrg struct amdgpu_cs_ib_info ib_info = {0}; 386d8807b2fSmrg struct amdgpu_cs_fence fence_status = {0}; 387d8807b2fSmrg uint32_t expired; 388d8807b2fSmrg int r; 389d8807b2fSmrg 390d8807b2fSmrg ib_info.ib_mc_address = ib_mc_address; 391d8807b2fSmrg ib_info.size = ndw; 392d8807b2fSmrg 393d8807b2fSmrg ibs_request.ip_type = ip; 394d8807b2fSmrg 395d8807b2fSmrg r = amdgpu_bo_list_create(device_handle, num_resources, resources, 396d8807b2fSmrg NULL, &ibs_request.resources); 397d8807b2fSmrg if (r) 398d8807b2fSmrg return r; 399d8807b2fSmrg 400d8807b2fSmrg ibs_request.number_of_ibs = 1; 401d8807b2fSmrg ibs_request.ibs = &ib_info; 402d8807b2fSmrg ibs_request.fence_info.handle = NULL; 403d8807b2fSmrg 404d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 405d8807b2fSmrg if (r) 406d8807b2fSmrg return r; 407d8807b2fSmrg 408d8807b2fSmrg r = amdgpu_bo_list_destroy(ibs_request.resources); 409d8807b2fSmrg if (r) 410d8807b2fSmrg return r; 411d8807b2fSmrg 412d8807b2fSmrg fence_status.context = context_handle; 413d8807b2fSmrg fence_status.ip_type = ip; 414d8807b2fSmrg fence_status.fence = ibs_request.seq_no; 415d8807b2fSmrg 416d8807b2fSmrg r = amdgpu_cs_query_fence_status(&fence_status, 417d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 418d8807b2fSmrg 0, &expired); 419d8807b2fSmrg if (r) 420d8807b2fSmrg return r; 421d8807b2fSmrg 422d8807b2fSmrg return 0; 423d8807b2fSmrg} 424d8807b2fSmrg 425d8807b2fSmrgstatic void alloc_resource(struct amdgpu_vcn_bo *vcn_bo, 426d8807b2fSmrg unsigned size, unsigned domain) 427d8807b2fSmrg{ 428d8807b2fSmrg struct amdgpu_bo_alloc_request req = {0}; 429d8807b2fSmrg amdgpu_bo_handle buf_handle; 430d8807b2fSmrg amdgpu_va_handle va_handle; 431d8807b2fSmrg uint64_t va = 0; 432d8807b2fSmrg int r; 433d8807b2fSmrg 434d8807b2fSmrg req.alloc_size = ALIGN(size, 4096); 435d8807b2fSmrg req.preferred_heap = domain; 436d8807b2fSmrg r = amdgpu_bo_alloc(device_handle, &req, &buf_handle); 437d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 438d8807b2fSmrg r = amdgpu_va_range_alloc(device_handle, 439d8807b2fSmrg amdgpu_gpu_va_range_general, 440d8807b2fSmrg req.alloc_size, 1, 0, &va, 441d8807b2fSmrg &va_handle, 0); 442d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 443d8807b2fSmrg r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0, 444d8807b2fSmrg AMDGPU_VA_OP_MAP); 445d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 446d8807b2fSmrg vcn_bo->addr = va; 447d8807b2fSmrg vcn_bo->handle = buf_handle; 448d8807b2fSmrg vcn_bo->size = req.alloc_size; 449d8807b2fSmrg vcn_bo->va_handle = va_handle; 450d8807b2fSmrg r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr); 451d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 452d8807b2fSmrg memset(vcn_bo->ptr, 0, size); 453d8807b2fSmrg r = amdgpu_bo_cpu_unmap(vcn_bo->handle); 454d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 455d8807b2fSmrg} 456d8807b2fSmrg 457d8807b2fSmrgstatic void free_resource(struct amdgpu_vcn_bo *vcn_bo) 458d8807b2fSmrg{ 459d8807b2fSmrg int r; 460d8807b2fSmrg 461d8807b2fSmrg r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size, 462d8807b2fSmrg vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP); 463d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 464d8807b2fSmrg 465d8807b2fSmrg r = amdgpu_va_range_free(vcn_bo->va_handle); 466d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 467d8807b2fSmrg 468d8807b2fSmrg r = amdgpu_bo_free(vcn_bo->handle); 469d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 470d8807b2fSmrg memset(vcn_bo, 0, sizeof(*vcn_bo)); 471d8807b2fSmrg} 472d8807b2fSmrg 473d8807b2fSmrgstatic void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) 474d8807b2fSmrg{ 475b0ab5608Smrg if (vcn_dec_sw_ring == false) { 476b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; 477b0ab5608Smrg ib_cpu[(*idx)++] = addr; 478b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; 479b0ab5608Smrg ib_cpu[(*idx)++] = addr >> 32; 480b0ab5608Smrg ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; 481b0ab5608Smrg ib_cpu[(*idx)++] = cmd << 1; 482b0ab5608Smrg return; 483b0ab5608Smrg } 484b0ab5608Smrg 485b0ab5608Smrg /* Support decode software ring message */ 486b0ab5608Smrg if (!(*idx)) { 487b0ab5608Smrg rvcn_decode_ib_package_t *ib_header; 488b0ab5608Smrg 489b0ab5608Smrg if (vcn_unified_ring) 490b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, idx, false); 491b0ab5608Smrg 492b0ab5608Smrg ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx]; 493b0ab5608Smrg ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + 494b0ab5608Smrg sizeof(struct rvcn_decode_ib_package_s); 495b0ab5608Smrg 496b0ab5608Smrg (*idx)++; 497b0ab5608Smrg ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER); 498b0ab5608Smrg (*idx)++; 499b0ab5608Smrg 500b0ab5608Smrg decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]); 501b0ab5608Smrg *idx += sizeof(struct rvcn_decode_buffer_s) / 4; 502b0ab5608Smrg memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); 503b0ab5608Smrg } 504b0ab5608Smrg 505b0ab5608Smrg switch(cmd) { 506b0ab5608Smrg case DECODE_CMD_MSG_BUFFER: 507b0ab5608Smrg decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER; 508b0ab5608Smrg decode_buffer->msg_buffer_address_hi = (addr >> 32); 509b0ab5608Smrg decode_buffer->msg_buffer_address_lo = (addr); 510b0ab5608Smrg break; 511b0ab5608Smrg case DECODE_CMD_DPB_BUFFER: 512b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER); 513b0ab5608Smrg decode_buffer->dpb_buffer_address_hi = (addr >> 32); 514b0ab5608Smrg decode_buffer->dpb_buffer_address_lo = (addr); 515b0ab5608Smrg break; 516b0ab5608Smrg case DECODE_CMD_DECODING_TARGET_BUFFER: 517b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER); 518b0ab5608Smrg decode_buffer->target_buffer_address_hi = (addr >> 32); 519b0ab5608Smrg decode_buffer->target_buffer_address_lo = (addr); 520b0ab5608Smrg break; 521b0ab5608Smrg case DECODE_CMD_FEEDBACK_BUFFER: 522b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER); 523b0ab5608Smrg decode_buffer->feedback_buffer_address_hi = (addr >> 32); 524b0ab5608Smrg decode_buffer->feedback_buffer_address_lo = (addr); 525b0ab5608Smrg break; 526b0ab5608Smrg case DECODE_CMD_PROB_TBL_BUFFER: 527b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER); 528b0ab5608Smrg decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32); 529b0ab5608Smrg decode_buffer->prob_tbl_buffer_address_lo = (addr); 530b0ab5608Smrg break; 531b0ab5608Smrg case DECODE_CMD_SESSION_CONTEXT_BUFFER: 532b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER); 533b0ab5608Smrg decode_buffer->session_contex_buffer_address_hi = (addr >> 32); 534b0ab5608Smrg decode_buffer->session_contex_buffer_address_lo = (addr); 535b0ab5608Smrg break; 536b0ab5608Smrg case DECODE_CMD_BITSTREAM_BUFFER: 537b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER); 538b0ab5608Smrg decode_buffer->bitstream_buffer_address_hi = (addr >> 32); 539b0ab5608Smrg decode_buffer->bitstream_buffer_address_lo = (addr); 540b0ab5608Smrg break; 541b0ab5608Smrg case DECODE_CMD_IT_SCALING_TABLE_BUFFER: 542b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER); 543b0ab5608Smrg decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32); 544b0ab5608Smrg decode_buffer->it_sclr_table_buffer_address_lo = (addr); 545b0ab5608Smrg break; 546b0ab5608Smrg case DECODE_CMD_CONTEXT_BUFFER: 547b0ab5608Smrg decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER); 548b0ab5608Smrg decode_buffer->context_buffer_address_hi = (addr >> 32); 549b0ab5608Smrg decode_buffer->context_buffer_address_lo = (addr); 550b0ab5608Smrg break; 551b0ab5608Smrg default: 552b0ab5608Smrg printf("Not Support!\n"); 553b0ab5608Smrg } 554d8807b2fSmrg} 555d8807b2fSmrg 556d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_create(void) 557d8807b2fSmrg{ 558d8807b2fSmrg struct amdgpu_vcn_bo msg_buf; 559b0ab5608Smrg unsigned ip; 560d8807b2fSmrg int len, r; 561d8807b2fSmrg 562d8807b2fSmrg num_resources = 0; 563d8807b2fSmrg alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 564d8807b2fSmrg resources[num_resources++] = msg_buf.handle; 565d8807b2fSmrg resources[num_resources++] = ib_handle; 566d8807b2fSmrg 567d8807b2fSmrg r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 568d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 569d8807b2fSmrg 570d8807b2fSmrg memset(msg_buf.ptr, 0, 4096); 571d8807b2fSmrg memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); 572d8807b2fSmrg 573d8807b2fSmrg len = 0; 574b0ab5608Smrg if (vcn_dec_sw_ring == true) 575b0ab5608Smrg vcn_dec_cmd(msg_buf.addr, 0, &len); 576b0ab5608Smrg else { 577b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data0; 578b0ab5608Smrg ib_cpu[len++] = msg_buf.addr; 579b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data1; 580b0ab5608Smrg ib_cpu[len++] = msg_buf.addr >> 32; 581b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cmd; 5826532f28eSmrg ib_cpu[len++] = 0; 583b0ab5608Smrg for (; len % 16; ) { 584b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 585b0ab5608Smrg ib_cpu[len++] = 0; 586b0ab5608Smrg } 5876532f28eSmrg } 588d8807b2fSmrg 589b0ab5608Smrg if (vcn_unified_ring) { 590b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 591b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 592b0ab5608Smrg } else 593b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 594b0ab5608Smrg 595b0ab5608Smrg r = submit(len, ip); 596b0ab5608Smrg 597d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 598d8807b2fSmrg 599d8807b2fSmrg free_resource(&msg_buf); 600d8807b2fSmrg} 601d8807b2fSmrg 602d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_decode(void) 603d8807b2fSmrg{ 60400a23bdaSmrg const unsigned dpb_size = 15923584, dt_size = 737280; 605d8807b2fSmrg uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum; 606d8807b2fSmrg struct amdgpu_vcn_bo dec_buf; 607d8807b2fSmrg int size, len, i, r; 608b0ab5608Smrg unsigned ip; 609d8807b2fSmrg uint8_t *dec; 610d8807b2fSmrg 611d8807b2fSmrg size = 4*1024; /* msg */ 612d8807b2fSmrg size += 4*1024; /* fb */ 613d8807b2fSmrg size += 4096; /*it_scaling_table*/ 614d8807b2fSmrg size += ALIGN(sizeof(uvd_bitstream), 4*1024); 615d8807b2fSmrg size += ALIGN(dpb_size, 4*1024); 616d8807b2fSmrg size += ALIGN(dt_size, 4*1024); 617d8807b2fSmrg 6180ed5401bSmrg num_resources = 0; 619d8807b2fSmrg alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT); 620d8807b2fSmrg resources[num_resources++] = dec_buf.handle; 621d8807b2fSmrg resources[num_resources++] = ib_handle; 622d8807b2fSmrg 623d8807b2fSmrg r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr); 624d8807b2fSmrg dec = dec_buf.ptr; 625d8807b2fSmrg 626d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 627d8807b2fSmrg memset(dec_buf.ptr, 0, size); 628d8807b2fSmrg memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg)); 629d8807b2fSmrg memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg), 630d8807b2fSmrg avc_decode_msg, sizeof(avc_decode_msg)); 631d8807b2fSmrg 632d8807b2fSmrg dec += 4*1024; 6339bd392adSmrg memcpy(dec, feedback_msg, sizeof(feedback_msg)); 634d8807b2fSmrg dec += 4*1024; 635d8807b2fSmrg memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table)); 636d8807b2fSmrg 637d8807b2fSmrg dec += 4*1024; 638d8807b2fSmrg memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream)); 639d8807b2fSmrg 640d8807b2fSmrg dec += ALIGN(sizeof(uvd_bitstream), 4*1024); 641d8807b2fSmrg 642d8807b2fSmrg dec += ALIGN(dpb_size, 4*1024); 643d8807b2fSmrg 644d8807b2fSmrg msg_addr = dec_buf.addr; 645d8807b2fSmrg fb_addr = msg_addr + 4*1024; 646d8807b2fSmrg it_addr = fb_addr + 4*1024; 647d8807b2fSmrg bs_addr = it_addr + 4*1024; 648d8807b2fSmrg dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024); 649d8807b2fSmrg ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); 650d8807b2fSmrg dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024); 651d8807b2fSmrg 652d8807b2fSmrg len = 0; 653d8807b2fSmrg vcn_dec_cmd(msg_addr, 0x0, &len); 654d8807b2fSmrg vcn_dec_cmd(dpb_addr, 0x1, &len); 655d8807b2fSmrg vcn_dec_cmd(dt_addr, 0x2, &len); 656d8807b2fSmrg vcn_dec_cmd(fb_addr, 0x3, &len); 657d8807b2fSmrg vcn_dec_cmd(bs_addr, 0x100, &len); 658d8807b2fSmrg vcn_dec_cmd(it_addr, 0x204, &len); 659d8807b2fSmrg vcn_dec_cmd(ctx_addr, 0x206, &len); 660d8807b2fSmrg 661b0ab5608Smrg if (vcn_dec_sw_ring == false) { 662b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cntl; 663b0ab5608Smrg ib_cpu[len++] = 0x1; 664b0ab5608Smrg for (; len % 16; ) { 665b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 666b0ab5608Smrg ib_cpu[len++] = 0; 667b0ab5608Smrg } 6686532f28eSmrg } 669d8807b2fSmrg 670b0ab5608Smrg if (vcn_unified_ring) { 671b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 672b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 673b0ab5608Smrg } else 674b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 675b0ab5608Smrg 676b0ab5608Smrg r = submit(len, ip); 677d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 678d8807b2fSmrg 679d8807b2fSmrg for (i = 0, sum = 0; i < dt_size; ++i) 680d8807b2fSmrg sum += dec[i]; 681d8807b2fSmrg 682d8807b2fSmrg CU_ASSERT_EQUAL(sum, SUM_DECODE); 683d8807b2fSmrg 684d8807b2fSmrg free_resource(&dec_buf); 685d8807b2fSmrg} 686d8807b2fSmrg 687d8807b2fSmrgstatic void amdgpu_cs_vcn_dec_destroy(void) 688d8807b2fSmrg{ 689d8807b2fSmrg struct amdgpu_vcn_bo msg_buf; 690b0ab5608Smrg unsigned ip; 691d8807b2fSmrg int len, r; 692d8807b2fSmrg 6930ed5401bSmrg num_resources = 0; 694d8807b2fSmrg alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT); 695d8807b2fSmrg resources[num_resources++] = msg_buf.handle; 696d8807b2fSmrg resources[num_resources++] = ib_handle; 697d8807b2fSmrg 698d8807b2fSmrg r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 699d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 700d8807b2fSmrg 701d8807b2fSmrg memset(msg_buf.ptr, 0, 1024); 702d8807b2fSmrg memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); 703d8807b2fSmrg 704d8807b2fSmrg len = 0; 705b0ab5608Smrg if (vcn_dec_sw_ring == true) 706b0ab5608Smrg vcn_dec_cmd(msg_buf.addr, 0, &len); 707b0ab5608Smrg else { 708b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data0; 709b0ab5608Smrg ib_cpu[len++] = msg_buf.addr; 710b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].data1; 711b0ab5608Smrg ib_cpu[len++] = msg_buf.addr >> 32; 712b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].cmd; 7136532f28eSmrg ib_cpu[len++] = 0; 714b0ab5608Smrg for (; len % 16; ) { 715b0ab5608Smrg ib_cpu[len++] = reg[vcn_reg_index].nop; 716b0ab5608Smrg ib_cpu[len++] = 0; 717b0ab5608Smrg } 7186532f28eSmrg } 719d8807b2fSmrg 720b0ab5608Smrg if (vcn_unified_ring) { 721b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 722b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_ENC; 723b0ab5608Smrg } else 724b0ab5608Smrg ip = AMDGPU_HW_IP_VCN_DEC; 725b0ab5608Smrg 726b0ab5608Smrg r = submit(len, ip); 727d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 728d8807b2fSmrg 729d8807b2fSmrg free_resource(&msg_buf); 730d8807b2fSmrg} 731d8807b2fSmrg 732d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_create(void) 733d8807b2fSmrg{ 7340ed5401bSmrg int len, r; 7350ed5401bSmrg uint32_t *p_task_size = NULL; 7360ed5401bSmrg uint32_t task_offset = 0, st_offset; 7370ed5401bSmrg uint32_t *st_size = NULL; 7380ed5401bSmrg unsigned width = 160, height = 128, buf_size; 7390ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 7400ed5401bSmrg 741b0ab5608Smrg if (vcn_ip_version_major == 2) { 7420ed5401bSmrg fw_maj = 1; 7430ed5401bSmrg fw_min = 1; 744b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 7450ed5401bSmrg fw_maj = 1; 7460ed5401bSmrg fw_min = 0; 7470ed5401bSmrg } 7480ed5401bSmrg 7490ed5401bSmrg gWidth = width; 7500ed5401bSmrg gHeight = height; 7510ed5401bSmrg buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 7520ed5401bSmrg enc_task_id = 1; 7530ed5401bSmrg 7540ed5401bSmrg num_resources = 0; 7550ed5401bSmrg alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); 7560ed5401bSmrg alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT); 7570ed5401bSmrg resources[num_resources++] = enc_buf.handle; 7580ed5401bSmrg resources[num_resources++] = cpb_buf.handle; 7590ed5401bSmrg resources[num_resources++] = ib_handle; 7600ed5401bSmrg 7610ed5401bSmrg r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr); 7620ed5401bSmrg memset(enc_buf.ptr, 0, 128 * 1024); 7630ed5401bSmrg r = amdgpu_bo_cpu_unmap(enc_buf.handle); 7640ed5401bSmrg 7650ed5401bSmrg r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr); 7660ed5401bSmrg memset(enc_buf.ptr, 0, buf_size * 2); 7670ed5401bSmrg r = amdgpu_bo_cpu_unmap(cpb_buf.handle); 7680ed5401bSmrg 7690ed5401bSmrg len = 0; 770b0ab5608Smrg 771b0ab5608Smrg if (vcn_unified_ring) 772b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 773b0ab5608Smrg 7740ed5401bSmrg /* session info */ 7750ed5401bSmrg st_offset = len; 7760ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 7770ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 7780ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 7790ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 7800ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 7810ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 7820ed5401bSmrg *st_size = (len - st_offset) * 4; 7830ed5401bSmrg 7840ed5401bSmrg /* task info */ 7850ed5401bSmrg task_offset = len; 7860ed5401bSmrg st_offset = len; 7870ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 7880ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 7890ed5401bSmrg p_task_size = &ib_cpu[len++]; 7900ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 7910ed5401bSmrg ib_cpu[len++] = 0; /* feedback */ 7920ed5401bSmrg *st_size = (len - st_offset) * 4; 7930ed5401bSmrg 7940ed5401bSmrg /* op init */ 7950ed5401bSmrg st_offset = len; 7960ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 7970ed5401bSmrg ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */ 7980ed5401bSmrg *st_size = (len - st_offset) * 4; 7990ed5401bSmrg 8000ed5401bSmrg /* session_init */ 8010ed5401bSmrg st_offset = len; 8020ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8030ed5401bSmrg ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */ 8040ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */ 8050ed5401bSmrg ib_cpu[len++] = width; 8060ed5401bSmrg ib_cpu[len++] = height; 8070ed5401bSmrg ib_cpu[len++] = 0; 8080ed5401bSmrg ib_cpu[len++] = 0; 8090ed5401bSmrg ib_cpu[len++] = 0; /* pre encode mode */ 8100ed5401bSmrg ib_cpu[len++] = 0; /* chroma enabled : false */ 8110ed5401bSmrg *st_size = (len - st_offset) * 4; 8120ed5401bSmrg 8130ed5401bSmrg /* slice control */ 8140ed5401bSmrg st_offset = len; 8150ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8160ed5401bSmrg ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */ 8170ed5401bSmrg ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */ 8180ed5401bSmrg ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16; 8190ed5401bSmrg *st_size = (len - st_offset) * 4; 8200ed5401bSmrg 8210ed5401bSmrg /* enc spec misc */ 8220ed5401bSmrg st_offset = len; 8230ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8240ed5401bSmrg ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */ 8250ed5401bSmrg ib_cpu[len++] = 0; /* constrained intra pred flag */ 8260ed5401bSmrg ib_cpu[len++] = 0; /* cabac enable */ 8270ed5401bSmrg ib_cpu[len++] = 0; /* cabac init idc */ 8280ed5401bSmrg ib_cpu[len++] = 1; /* half pel enabled */ 8290ed5401bSmrg ib_cpu[len++] = 1; /* quarter pel enabled */ 8300ed5401bSmrg ib_cpu[len++] = 100; /* BASELINE profile */ 8310ed5401bSmrg ib_cpu[len++] = 11; /* level */ 832b0ab5608Smrg if (vcn_ip_version_major == 3) { 8330ed5401bSmrg ib_cpu[len++] = 0; /* b_picture_enabled */ 8340ed5401bSmrg ib_cpu[len++] = 0; /* weighted_bipred_idc */ 8350ed5401bSmrg } 8360ed5401bSmrg *st_size = (len - st_offset) * 4; 8370ed5401bSmrg 8380ed5401bSmrg /* deblocking filter */ 8390ed5401bSmrg st_offset = len; 8400ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8410ed5401bSmrg ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */ 8420ed5401bSmrg ib_cpu[len++] = 0; /* disable deblocking filter idc */ 8430ed5401bSmrg ib_cpu[len++] = 0; /* alpha c0 offset */ 8440ed5401bSmrg ib_cpu[len++] = 0; /* tc offset */ 8450ed5401bSmrg ib_cpu[len++] = 0; /* cb offset */ 8460ed5401bSmrg ib_cpu[len++] = 0; /* cr offset */ 8470ed5401bSmrg *st_size = (len - st_offset) * 4; 8480ed5401bSmrg 8490ed5401bSmrg /* layer control */ 8500ed5401bSmrg st_offset = len; 8510ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8520ed5401bSmrg ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */ 8530ed5401bSmrg ib_cpu[len++] = 1; /* max temporal layer */ 8540ed5401bSmrg ib_cpu[len++] = 1; /* no of temporal layer */ 8550ed5401bSmrg *st_size = (len - st_offset) * 4; 8560ed5401bSmrg 8570ed5401bSmrg /* rc_session init */ 8580ed5401bSmrg st_offset = len; 8590ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8600ed5401bSmrg ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */ 8610ed5401bSmrg ib_cpu[len++] = 0; /* rate control */ 8620ed5401bSmrg ib_cpu[len++] = 48; /* vbv buffer level */ 8630ed5401bSmrg *st_size = (len - st_offset) * 4; 8640ed5401bSmrg 8650ed5401bSmrg /* quality params */ 8660ed5401bSmrg st_offset = len; 8670ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8680ed5401bSmrg ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */ 8690ed5401bSmrg ib_cpu[len++] = 0; /* vbaq mode */ 8700ed5401bSmrg ib_cpu[len++] = 0; /* scene change sensitivity */ 8710ed5401bSmrg ib_cpu[len++] = 0; /* scene change min idr interval */ 8720ed5401bSmrg ib_cpu[len++] = 0; 873b0ab5608Smrg if (vcn_ip_version_major == 3) 8740ed5401bSmrg ib_cpu[len++] = 0; 8750ed5401bSmrg *st_size = (len - st_offset) * 4; 8760ed5401bSmrg 8770ed5401bSmrg /* layer select */ 8780ed5401bSmrg st_offset = len; 8790ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8800ed5401bSmrg ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 8810ed5401bSmrg ib_cpu[len++] = 0; /* temporal layer */ 8820ed5401bSmrg *st_size = (len - st_offset) * 4; 8830ed5401bSmrg 8840ed5401bSmrg /* rc layer init */ 8850ed5401bSmrg st_offset = len; 8860ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 8870ed5401bSmrg ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */ 8880ed5401bSmrg ib_cpu[len++] = 0; 8890ed5401bSmrg ib_cpu[len++] = 0; 8900ed5401bSmrg ib_cpu[len++] = 25; 8910ed5401bSmrg ib_cpu[len++] = 1; 8920ed5401bSmrg ib_cpu[len++] = 0x01312d00; 8930ed5401bSmrg ib_cpu[len++] = 0; 8940ed5401bSmrg ib_cpu[len++] = 0; 8950ed5401bSmrg ib_cpu[len++] = 0; 8960ed5401bSmrg *st_size = (len - st_offset) * 4; 8970ed5401bSmrg 8980ed5401bSmrg /* layer select */ 8990ed5401bSmrg st_offset = len; 9000ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9010ed5401bSmrg ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 9020ed5401bSmrg ib_cpu[len++] = 0; /* temporal layer */ 9030ed5401bSmrg *st_size = (len - st_offset) * 4; 9040ed5401bSmrg 9050ed5401bSmrg /* rc per pic */ 9060ed5401bSmrg st_offset = len; 9070ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9080ed5401bSmrg ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */ 9090ed5401bSmrg ib_cpu[len++] = 20; 9100ed5401bSmrg ib_cpu[len++] = 0; 9110ed5401bSmrg ib_cpu[len++] = 51; 9120ed5401bSmrg ib_cpu[len++] = 0; 9130ed5401bSmrg ib_cpu[len++] = 1; 9140ed5401bSmrg ib_cpu[len++] = 0; 9150ed5401bSmrg ib_cpu[len++] = 1; 9160ed5401bSmrg *st_size = (len - st_offset) * 4; 9170ed5401bSmrg 9180ed5401bSmrg /* op init rc */ 9190ed5401bSmrg st_offset = len; 9200ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9210ed5401bSmrg ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */ 9220ed5401bSmrg *st_size = (len - st_offset) * 4; 9230ed5401bSmrg 9240ed5401bSmrg /* op init rc vbv */ 9250ed5401bSmrg st_offset = len; 9260ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 9270ed5401bSmrg ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */ 9280ed5401bSmrg *st_size = (len - st_offset) * 4; 9290ed5401bSmrg 9300ed5401bSmrg *p_task_size = (len - task_offset) * 4; 9310ed5401bSmrg 932b0ab5608Smrg if (vcn_unified_ring) 933b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 934b0ab5608Smrg 9350ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 9360ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 9370ed5401bSmrg} 9380ed5401bSmrg 9390ed5401bSmrgstatic int32_t h264_se (bufferInfo * bufInfo) 9400ed5401bSmrg{ 9410ed5401bSmrg uint32_t ret; 9420ed5401bSmrg 9430ed5401bSmrg ret = bs_read_ue (bufInfo); 9440ed5401bSmrg if ((ret & 0x1) == 0) { 9450ed5401bSmrg ret >>= 1; 9460ed5401bSmrg int32_t temp = 0 - ret; 9470ed5401bSmrg return temp; 9480ed5401bSmrg } 9490ed5401bSmrg 9500ed5401bSmrg return (ret + 1) >> 1; 9510ed5401bSmrg} 9520ed5401bSmrg 9530ed5401bSmrgstatic void h264_check_0s (bufferInfo * bufInfo, int count) 9540ed5401bSmrg{ 9550ed5401bSmrg uint32_t val; 9560ed5401bSmrg 9570ed5401bSmrg val = bs_read_u (bufInfo, count); 9580ed5401bSmrg if (val != 0) { 9590ed5401bSmrg printf ("field error - %d bits should be 0 is %x\n", count, val); 9600ed5401bSmrg } 9610ed5401bSmrg} 9620ed5401bSmrg 9630ed5401bSmrgstatic inline int bs_eof(bufferInfo * bufinfo) 9640ed5401bSmrg{ 9650ed5401bSmrg if (bufinfo->decBuffer >= bufinfo->end) 9660ed5401bSmrg return 1; 9670ed5401bSmrg else 9680ed5401bSmrg return 0; 9690ed5401bSmrg} 9700ed5401bSmrg 9710ed5401bSmrgstatic inline uint32_t bs_read_u1(bufferInfo *bufinfo) 9720ed5401bSmrg{ 9730ed5401bSmrg uint32_t r = 0; 9740ed5401bSmrg uint32_t temp = 0; 9750ed5401bSmrg 9760ed5401bSmrg bufinfo->numOfBitsInBuffer--; 9770ed5401bSmrg if (! bs_eof(bufinfo)) { 9780ed5401bSmrg temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer); 9790ed5401bSmrg r = temp & 0x01; 9800ed5401bSmrg } 9810ed5401bSmrg 9820ed5401bSmrg if (bufinfo->numOfBitsInBuffer == 0) { 9830ed5401bSmrg bufinfo->decBuffer++; 9840ed5401bSmrg bufinfo->decData = *bufinfo->decBuffer; 9850ed5401bSmrg bufinfo->numOfBitsInBuffer = 8; 9860ed5401bSmrg } 9870ed5401bSmrg 9880ed5401bSmrg return r; 9890ed5401bSmrg} 9900ed5401bSmrg 9910ed5401bSmrgstatic inline uint32_t bs_read_u(bufferInfo* bufinfo, int n) 9920ed5401bSmrg{ 9930ed5401bSmrg uint32_t r = 0; 9940ed5401bSmrg int i; 9950ed5401bSmrg 9960ed5401bSmrg for (i = 0; i < n; i++) { 9970ed5401bSmrg r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) ); 9980ed5401bSmrg } 9990ed5401bSmrg 10000ed5401bSmrg return r; 10010ed5401bSmrg} 10020ed5401bSmrg 10030ed5401bSmrgstatic inline uint32_t bs_read_ue(bufferInfo* bufinfo) 10040ed5401bSmrg{ 10050ed5401bSmrg int32_t r = 0; 10060ed5401bSmrg int i = 0; 10070ed5401bSmrg 10080ed5401bSmrg while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) { 10090ed5401bSmrg i++; 10100ed5401bSmrg } 10110ed5401bSmrg r = bs_read_u(bufinfo, i); 10120ed5401bSmrg r += (1 << i) - 1; 10130ed5401bSmrg return r; 10140ed5401bSmrg} 10150ed5401bSmrg 10160ed5401bSmrgstatic uint32_t remove_03 (uint8_t * bptr, uint32_t len) 10170ed5401bSmrg{ 10180ed5401bSmrg uint32_t nal_len = 0; 10190ed5401bSmrg while (nal_len + 2 < len) { 10200ed5401bSmrg if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) { 10210ed5401bSmrg bptr += 2; 10220ed5401bSmrg nal_len += 2; 10230ed5401bSmrg len--; 10240ed5401bSmrg memmove (bptr, bptr + 1, len - nal_len); 10250ed5401bSmrg } else { 10260ed5401bSmrg bptr++; 10270ed5401bSmrg nal_len++; 10280ed5401bSmrg } 10290ed5401bSmrg } 10300ed5401bSmrg return len; 10310ed5401bSmrg} 10320ed5401bSmrg 10330ed5401bSmrgstatic void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo) 10340ed5401bSmrg{ 10350ed5401bSmrg uint32_t lastScale = 8, nextScale = 8; 10360ed5401bSmrg uint32_t jx; 10370ed5401bSmrg int deltaScale; 10380ed5401bSmrg 10390ed5401bSmrg for (jx = 0; jx < sizeOfScalingList; jx++) { 10400ed5401bSmrg if (nextScale != 0) { 10410ed5401bSmrg deltaScale = h264_se (bufInfo); 10420ed5401bSmrg nextScale = (lastScale + deltaScale + 256) % 256; 10430ed5401bSmrg } 10440ed5401bSmrg if (nextScale == 0) { 10450ed5401bSmrg lastScale = lastScale; 10460ed5401bSmrg } else { 10470ed5401bSmrg lastScale = nextScale; 10480ed5401bSmrg } 10490ed5401bSmrg } 10500ed5401bSmrg} 10510ed5401bSmrg 10520ed5401bSmrgstatic void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo) 10530ed5401bSmrg{ 10540ed5401bSmrg uint32_t temp; 10550ed5401bSmrg 10560ed5401bSmrg dec->profile = bs_read_u (bufInfo, 8); 10570ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set0_flag */ 10580ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set1_flag */ 10590ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set2_flag */ 10600ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set3_flag */ 10610ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set4_flag */ 10620ed5401bSmrg bs_read_u (bufInfo, 1); /* constaint_set5_flag */ 10630ed5401bSmrg 10640ed5401bSmrg 10650ed5401bSmrg h264_check_0s (bufInfo, 2); 10660ed5401bSmrg dec->level_idc = bs_read_u (bufInfo, 8); 10670ed5401bSmrg bs_read_ue (bufInfo); /* SPS id*/ 10680ed5401bSmrg 10690ed5401bSmrg if (dec->profile == 100 || dec->profile == 110 || 10700ed5401bSmrg dec->profile == 122 || dec->profile == 144) { 10710ed5401bSmrg uint32_t chroma_format_idc = bs_read_ue (bufInfo); 10720ed5401bSmrg if (chroma_format_idc == 3) { 10730ed5401bSmrg bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */ 10740ed5401bSmrg } 10750ed5401bSmrg bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */ 10760ed5401bSmrg bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */ 10770ed5401bSmrg bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */ 10780ed5401bSmrg uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1); 10790ed5401bSmrg 10800ed5401bSmrg if (seq_scaling_matrix_present_flag) { 10810ed5401bSmrg for (uint32_t ix = 0; ix < 8; ix++) { 10820ed5401bSmrg temp = bs_read_u (bufInfo, 1); 10830ed5401bSmrg if (temp) { 10840ed5401bSmrg scaling_list (ix, ix < 6 ? 16 : 64, bufInfo); 10850ed5401bSmrg } 10860ed5401bSmrg } 10870ed5401bSmrg } 10880ed5401bSmrg } 10890ed5401bSmrg 10900ed5401bSmrg bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */ 10910ed5401bSmrg uint32_t pic_order_cnt_type = bs_read_ue (bufInfo); 10920ed5401bSmrg 10930ed5401bSmrg if (pic_order_cnt_type == 0) { 10940ed5401bSmrg bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */ 10950ed5401bSmrg } else if (pic_order_cnt_type == 1) { 10960ed5401bSmrg bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */ 10970ed5401bSmrg h264_se (bufInfo); /* offset_for_non_ref_pic */ 10980ed5401bSmrg h264_se (bufInfo); /* offset_for_top_to_bottom_field */ 10990ed5401bSmrg temp = bs_read_ue (bufInfo); 11000ed5401bSmrg for (uint32_t ix = 0; ix < temp; ix++) { 11010ed5401bSmrg h264_se (bufInfo); /* offset_for_ref_frame[index] */ 11020ed5401bSmrg } 11030ed5401bSmrg } 11040ed5401bSmrg bs_read_ue (bufInfo); /* num_ref_frames */ 11050ed5401bSmrg bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */ 11060ed5401bSmrg uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1; 11070ed5401bSmrg 11080ed5401bSmrg dec->pic_width = PicWidthInMbs * 16; 11090ed5401bSmrg uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1; 11100ed5401bSmrg 11110ed5401bSmrg dec->pic_height = PicHeightInMapUnits * 16; 11120ed5401bSmrg uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1); 11130ed5401bSmrg if (!frame_mbs_only_flag) { 11140ed5401bSmrg bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */ 11150ed5401bSmrg } 11160ed5401bSmrg bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */ 11170ed5401bSmrg temp = bs_read_u (bufInfo, 1); 11180ed5401bSmrg if (temp) { 11190ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_left_offset */ 11200ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_right_offset */ 11210ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_top_offset */ 11220ed5401bSmrg bs_read_ue (bufInfo); /* frame_crop_bottom_offset */ 11230ed5401bSmrg } 11240ed5401bSmrg temp = bs_read_u (bufInfo, 1); /* VUI Parameters */ 11250ed5401bSmrg} 11260ed5401bSmrg 11270ed5401bSmrgstatic void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo) 11280ed5401bSmrg{ 11290ed5401bSmrg uint32_t temp; 11300ed5401bSmrg 11310ed5401bSmrg bs_read_ue (bufInfo); /* first_mb_in_slice */ 11320ed5401bSmrg temp = bs_read_ue (bufInfo); 11330ed5401bSmrg dec->slice_type = ((temp > 5) ? (temp - 5) : temp); 11340ed5401bSmrg} 11350ed5401bSmrg 11360ed5401bSmrgstatic uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo) 11370ed5401bSmrg{ 11380ed5401bSmrg uint8_t type = 0; 11390ed5401bSmrg 11400ed5401bSmrg h264_check_0s (bufInfo, 1); 11410ed5401bSmrg dec->nal_ref_idc = bs_read_u (bufInfo, 2); 11420ed5401bSmrg dec->nal_unit_type = type = bs_read_u (bufInfo, 5); 11430ed5401bSmrg switch (type) 11440ed5401bSmrg { 11450ed5401bSmrg case H264_NAL_TYPE_NON_IDR_SLICE: 11460ed5401bSmrg case H264_NAL_TYPE_IDR_SLICE: 11470ed5401bSmrg h264_slice_header (dec, bufInfo); 11480ed5401bSmrg break; 11490ed5401bSmrg case H264_NAL_TYPE_SEQ_PARAM: 11500ed5401bSmrg h264_parse_sequence_parameter_set (dec, bufInfo); 11510ed5401bSmrg break; 11520ed5401bSmrg case H264_NAL_TYPE_PIC_PARAM: 11530ed5401bSmrg case H264_NAL_TYPE_SEI: 11540ed5401bSmrg case H264_NAL_TYPE_ACCESS_UNIT: 11550ed5401bSmrg case H264_NAL_TYPE_SEQ_EXTENSION: 11560ed5401bSmrg /* NOP */ 11570ed5401bSmrg break; 11580ed5401bSmrg default: 11590ed5401bSmrg printf ("Nal type unknown %d \n ", type); 11600ed5401bSmrg break; 11610ed5401bSmrg } 11620ed5401bSmrg return type; 11630ed5401bSmrg} 11640ed5401bSmrg 11650ed5401bSmrgstatic uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen) 11660ed5401bSmrg{ 11670ed5401bSmrg uint32_t val; 11680ed5401bSmrg uint32_t offset, startBytes; 11690ed5401bSmrg 11700ed5401bSmrg offset = startBytes = 0; 11710ed5401bSmrg if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) { 11720ed5401bSmrg pBuf += 4; 11730ed5401bSmrg offset = 4; 11740ed5401bSmrg startBytes = 1; 11750ed5401bSmrg } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) { 11760ed5401bSmrg pBuf += 3; 11770ed5401bSmrg offset = 3; 11780ed5401bSmrg startBytes = 1; 11790ed5401bSmrg } 11800ed5401bSmrg val = 0xffffffff; 11810ed5401bSmrg while (offset < bufLen - 3) { 11820ed5401bSmrg val <<= 8; 11830ed5401bSmrg val |= *pBuf++; 11840ed5401bSmrg offset++; 11850ed5401bSmrg if (val == H264_START_CODE) 11860ed5401bSmrg return offset - 4; 11870ed5401bSmrg 11880ed5401bSmrg if ((val & 0x00ffffff) == H264_START_CODE) 11890ed5401bSmrg return offset - 3; 11900ed5401bSmrg } 11910ed5401bSmrg if (bufLen - offset <= 3 && startBytes == 0) { 11920ed5401bSmrg startBytes = 0; 11930ed5401bSmrg return 0; 11940ed5401bSmrg } 11950ed5401bSmrg 11960ed5401bSmrg return offset; 11970ed5401bSmrg} 11980ed5401bSmrg 11990ed5401bSmrgstatic int verify_checksum(uint8_t *buffer, uint32_t buffer_size) 12000ed5401bSmrg{ 12010ed5401bSmrg uint32_t buffer_pos = 0; 12020ed5401bSmrg int done = 0; 12030ed5401bSmrg h264_decode dec; 12040ed5401bSmrg 12050ed5401bSmrg memset(&dec, 0, sizeof(h264_decode)); 12060ed5401bSmrg do { 12070ed5401bSmrg uint32_t ret; 12080ed5401bSmrg 12090ed5401bSmrg ret = h264_find_next_start_code (buffer + buffer_pos, 12100ed5401bSmrg buffer_size - buffer_pos); 12110ed5401bSmrg if (ret == 0) { 12120ed5401bSmrg done = 1; 12130ed5401bSmrg if (buffer_pos == 0) { 12140ed5401bSmrg fprintf (stderr, 12150ed5401bSmrg "couldn't find start code in buffer from 0\n"); 12160ed5401bSmrg } 12170ed5401bSmrg } else { 12180ed5401bSmrg /* have a complete NAL from buffer_pos to end */ 12190ed5401bSmrg if (ret > 3) { 12200ed5401bSmrg uint32_t nal_len; 12210ed5401bSmrg bufferInfo bufinfo; 12220ed5401bSmrg 12230ed5401bSmrg nal_len = remove_03 (buffer + buffer_pos, ret); 12240ed5401bSmrg bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4); 12250ed5401bSmrg bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8; 12260ed5401bSmrg bufinfo.end = buffer + buffer_pos + nal_len; 12270ed5401bSmrg bufinfo.numOfBitsInBuffer = 8; 12280ed5401bSmrg bufinfo.decData = *bufinfo.decBuffer; 12290ed5401bSmrg h264_parse_nal (&dec, &bufinfo); 12300ed5401bSmrg } 12310ed5401bSmrg buffer_pos += ret; /* buffer_pos points to next code */ 12320ed5401bSmrg } 12330ed5401bSmrg } while (done == 0); 12340ed5401bSmrg 12350ed5401bSmrg if ((dec.pic_width == gWidth) && 12360ed5401bSmrg (dec.pic_height == gHeight) && 12370ed5401bSmrg (dec.slice_type == gSliceType)) 12380ed5401bSmrg return 0; 12390ed5401bSmrg else 12400ed5401bSmrg return -1; 12410ed5401bSmrg} 12420ed5401bSmrg 12430ed5401bSmrgstatic void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type) 12440ed5401bSmrg{ 12450ed5401bSmrg uint32_t *fb_ptr; 12460ed5401bSmrg uint8_t *bs_ptr; 12470ed5401bSmrg uint32_t size; 12480ed5401bSmrg int r; 12490ed5401bSmrg/* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */ 12500ed5401bSmrg 12510ed5401bSmrg r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr); 12520ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12530ed5401bSmrg fb_ptr = (uint32_t*)fb_buf.ptr; 12540ed5401bSmrg size = fb_ptr[6]; 12550ed5401bSmrg r = amdgpu_bo_cpu_unmap(fb_buf.handle); 12560ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12570ed5401bSmrg r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr); 12580ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12590ed5401bSmrg 12600ed5401bSmrg bs_ptr = (uint8_t*)bs_buf.ptr; 12610ed5401bSmrg r = verify_checksum(bs_ptr, size); 12620ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12630ed5401bSmrg r = amdgpu_bo_cpu_unmap(bs_buf.handle); 12640ed5401bSmrg 12650ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 12660ed5401bSmrg} 12670ed5401bSmrg 12680ed5401bSmrgstatic void amdgpu_cs_vcn_enc_encode_frame(int frame_type) 12690ed5401bSmrg{ 12700ed5401bSmrg struct amdgpu_vcn_bo bs_buf, fb_buf, vbv_buf; 12710ed5401bSmrg int len, r, i; 12720ed5401bSmrg unsigned width = 160, height = 128, buf_size; 12730ed5401bSmrg uint32_t *p_task_size = NULL; 12740ed5401bSmrg uint32_t task_offset = 0, st_offset; 12750ed5401bSmrg uint32_t *st_size = NULL; 12760ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 12770ed5401bSmrg 1278b0ab5608Smrg if (vcn_ip_version_major == 2) { 12790ed5401bSmrg fw_maj = 1; 12800ed5401bSmrg fw_min = 1; 1281b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 12820ed5401bSmrg fw_maj = 1; 12830ed5401bSmrg fw_min = 0; 12840ed5401bSmrg } 12850ed5401bSmrg gSliceType = frame_type; 12860ed5401bSmrg buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 12870ed5401bSmrg 12880ed5401bSmrg num_resources = 0; 12890ed5401bSmrg alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 12900ed5401bSmrg alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 12910ed5401bSmrg alloc_resource(&vbv_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); 12920ed5401bSmrg resources[num_resources++] = enc_buf.handle; 12930ed5401bSmrg resources[num_resources++] = cpb_buf.handle; 12940ed5401bSmrg resources[num_resources++] = bs_buf.handle; 12950ed5401bSmrg resources[num_resources++] = fb_buf.handle; 12960ed5401bSmrg resources[num_resources++] = vbv_buf.handle; 12970ed5401bSmrg resources[num_resources++] = ib_handle; 12980ed5401bSmrg 12990ed5401bSmrg 13000ed5401bSmrg r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr); 13010ed5401bSmrg memset(bs_buf.ptr, 0, 4096); 13020ed5401bSmrg r = amdgpu_bo_cpu_unmap(bs_buf.handle); 13030ed5401bSmrg 13040ed5401bSmrg r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr); 13050ed5401bSmrg memset(fb_buf.ptr, 0, 4096); 13060ed5401bSmrg r = amdgpu_bo_cpu_unmap(fb_buf.handle); 13070ed5401bSmrg 13080ed5401bSmrg r = amdgpu_bo_cpu_map(vbv_buf.handle, (void **)&vbv_buf.ptr); 13090ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 13100ed5401bSmrg 13110ed5401bSmrg for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) 13120ed5401bSmrg memcpy(vbv_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); 13130ed5401bSmrg 13140ed5401bSmrg r = amdgpu_bo_cpu_unmap(vbv_buf.handle); 13150ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 13160ed5401bSmrg 13170ed5401bSmrg len = 0; 1318b0ab5608Smrg 1319b0ab5608Smrg if (vcn_unified_ring) 1320b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 1321b0ab5608Smrg 13220ed5401bSmrg /* session info */ 13230ed5401bSmrg st_offset = len; 13240ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 13250ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 13260ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 13270ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 13280ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 13290ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */; 13300ed5401bSmrg *st_size = (len - st_offset) * 4; 13310ed5401bSmrg 13320ed5401bSmrg /* task info */ 13330ed5401bSmrg task_offset = len; 13340ed5401bSmrg st_offset = len; 13350ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 13360ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 13370ed5401bSmrg p_task_size = &ib_cpu[len++]; 13380ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 13390ed5401bSmrg ib_cpu[len++] = 1; /* feedback */ 13400ed5401bSmrg *st_size = (len - st_offset) * 4; 13410ed5401bSmrg 13420ed5401bSmrg if (frame_type == 2) { 13430ed5401bSmrg /* sps */ 13440ed5401bSmrg st_offset = len; 13450ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1346b0ab5608Smrg if(vcn_ip_version_major == 1) 13470ed5401bSmrg ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ 13480ed5401bSmrg else 13490ed5401bSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */ 13500ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ 13510ed5401bSmrg ib_cpu[len++] = 0x00000011; /* sps len */ 13520ed5401bSmrg ib_cpu[len++] = 0x00000001; /* start code */ 13530ed5401bSmrg ib_cpu[len++] = 0x6764440b; 13540ed5401bSmrg ib_cpu[len++] = 0xac54c284; 13550ed5401bSmrg ib_cpu[len++] = 0x68078442; 13560ed5401bSmrg ib_cpu[len++] = 0x37000000; 13570ed5401bSmrg *st_size = (len - st_offset) * 4; 13580ed5401bSmrg 13590ed5401bSmrg /* pps */ 13600ed5401bSmrg st_offset = len; 13610ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1362b0ab5608Smrg if(vcn_ip_version_major == 1) 13630ed5401bSmrg ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ 13640ed5401bSmrg else 13650ed5401bSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/ 13660ed5401bSmrg ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ 13670ed5401bSmrg ib_cpu[len++] = 0x00000008; /* pps len */ 13680ed5401bSmrg ib_cpu[len++] = 0x00000001; /* start code */ 13690ed5401bSmrg ib_cpu[len++] = 0x68ce3c80; 13700ed5401bSmrg *st_size = (len - st_offset) * 4; 13710ed5401bSmrg } 13720ed5401bSmrg 13730ed5401bSmrg /* slice header */ 13740ed5401bSmrg st_offset = len; 13750ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1376b0ab5608Smrg if(vcn_ip_version_major == 1) 13770ed5401bSmrg ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ 13780ed5401bSmrg else 13790ed5401bSmrg ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */ 13800ed5401bSmrg if (frame_type == 2) { 13810ed5401bSmrg ib_cpu[len++] = 0x65000000; 13820ed5401bSmrg ib_cpu[len++] = 0x11040000; 13830ed5401bSmrg } else { 13840ed5401bSmrg ib_cpu[len++] = 0x41000000; 13850ed5401bSmrg ib_cpu[len++] = 0x34210000; 13860ed5401bSmrg } 13870ed5401bSmrg ib_cpu[len++] = 0xe0000000; 13880ed5401bSmrg for(i = 0; i < 13; i++) 13890ed5401bSmrg ib_cpu[len++] = 0x00000000; 13900ed5401bSmrg 13910ed5401bSmrg ib_cpu[len++] = 0x00000001; 13920ed5401bSmrg ib_cpu[len++] = 0x00000008; 13930ed5401bSmrg ib_cpu[len++] = 0x00020000; 13940ed5401bSmrg ib_cpu[len++] = 0x00000000; 13950ed5401bSmrg ib_cpu[len++] = 0x00000001; 13960ed5401bSmrg ib_cpu[len++] = 0x00000015; 13970ed5401bSmrg ib_cpu[len++] = 0x00020001; 13980ed5401bSmrg ib_cpu[len++] = 0x00000000; 13990ed5401bSmrg ib_cpu[len++] = 0x00000001; 14000ed5401bSmrg ib_cpu[len++] = 0x00000003; 14010ed5401bSmrg for(i = 0; i < 22; i++) 14020ed5401bSmrg ib_cpu[len++] = 0x00000000; 14030ed5401bSmrg 14040ed5401bSmrg *st_size = (len - st_offset) * 4; 14050ed5401bSmrg 14060ed5401bSmrg /* encode params */ 14070ed5401bSmrg st_offset = len; 14080ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1409b0ab5608Smrg if(vcn_ip_version_major == 1) 14100ed5401bSmrg ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/ 14110ed5401bSmrg else 14120ed5401bSmrg ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ 14130ed5401bSmrg ib_cpu[len++] = frame_type; 14140ed5401bSmrg ib_cpu[len++] = 0x0001f000; 14150ed5401bSmrg ib_cpu[len++] = vbv_buf.addr >> 32; 14160ed5401bSmrg ib_cpu[len++] = vbv_buf.addr; 14170ed5401bSmrg ib_cpu[len++] = (vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; 14180ed5401bSmrg ib_cpu[len++] = vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); 14190ed5401bSmrg ib_cpu[len++] = 0x00000100; 14200ed5401bSmrg ib_cpu[len++] = 0x00000080; 14210ed5401bSmrg ib_cpu[len++] = 0x00000000; 14220ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14230ed5401bSmrg ib_cpu[len++] = 0x00000000; 14240ed5401bSmrg *st_size = (len - st_offset) * 4; 14250ed5401bSmrg 14260ed5401bSmrg /* encode params h264 */ 14270ed5401bSmrg st_offset = len; 14280ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 14290ed5401bSmrg ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ 1430b0ab5608Smrg if (vcn_ip_version_major != 3) { 14310ed5401bSmrg ib_cpu[len++] = 0x00000000; 14320ed5401bSmrg ib_cpu[len++] = 0x00000000; 14330ed5401bSmrg ib_cpu[len++] = 0x00000000; 14340ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14350ed5401bSmrg } else { 14360ed5401bSmrg ib_cpu[len++] = 0x00000000; 14370ed5401bSmrg ib_cpu[len++] = 0x00000000; 14380ed5401bSmrg ib_cpu[len++] = 0x00000000; 14390ed5401bSmrg ib_cpu[len++] = 0x00000000; 14400ed5401bSmrg ib_cpu[len++] = 0x00000000; 14410ed5401bSmrg ib_cpu[len++] = 0x00000000; 14420ed5401bSmrg ib_cpu[len++] = 0x00000000; 14430ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14440ed5401bSmrg ib_cpu[len++] = 0x00000000; 14450ed5401bSmrg ib_cpu[len++] = 0x00000000; 14460ed5401bSmrg ib_cpu[len++] = 0x00000000; 14470ed5401bSmrg ib_cpu[len++] = 0x00000000; 14480ed5401bSmrg ib_cpu[len++] = 0xffffffff; 14490ed5401bSmrg ib_cpu[len++] = 0x00000000; 14500ed5401bSmrg ib_cpu[len++] = 0x00000000; 14510ed5401bSmrg ib_cpu[len++] = 0x00000000; 14520ed5401bSmrg ib_cpu[len++] = 0x00000000; 14530ed5401bSmrg } 14540ed5401bSmrg *st_size = (len - st_offset) * 4; 14550ed5401bSmrg 14560ed5401bSmrg /* encode context */ 14570ed5401bSmrg st_offset = len; 14580ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1459b0ab5608Smrg if(vcn_ip_version_major == 1) 14600ed5401bSmrg ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ 14610ed5401bSmrg else 14620ed5401bSmrg ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */ 14630ed5401bSmrg ib_cpu[len++] = cpb_buf.addr >> 32; 14640ed5401bSmrg ib_cpu[len++] = cpb_buf.addr; 14650ed5401bSmrg ib_cpu[len++] = 0x00000000; /* swizzle mode */ 14660ed5401bSmrg ib_cpu[len++] = 0x00000100; /* luma pitch */ 14670ed5401bSmrg ib_cpu[len++] = 0x00000100; /* chroma pitch */ 14680ed5401bSmrg ib_cpu[len++] = 0x00000003; /* no reconstructed picture */ 14690ed5401bSmrg ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ 14700ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ 14710ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ 14720ed5401bSmrg ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ 14730ed5401bSmrg 14740ed5401bSmrg for (int i = 0; i < 136; i++) 14750ed5401bSmrg ib_cpu[len++] = 0x00000000; 14760ed5401bSmrg *st_size = (len - st_offset) * 4; 14770ed5401bSmrg 14780ed5401bSmrg /* bitstream buffer */ 14790ed5401bSmrg st_offset = len; 14800ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1481b0ab5608Smrg if(vcn_ip_version_major == 1) 14820ed5401bSmrg ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ 14830ed5401bSmrg else 14840ed5401bSmrg ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */ 14850ed5401bSmrg ib_cpu[len++] = 0x00000000; /* mode */ 14860ed5401bSmrg ib_cpu[len++] = bs_buf.addr >> 32; 14870ed5401bSmrg ib_cpu[len++] = bs_buf.addr; 14880ed5401bSmrg ib_cpu[len++] = 0x0001f000; 14890ed5401bSmrg ib_cpu[len++] = 0x00000000; 14900ed5401bSmrg *st_size = (len - st_offset) * 4; 14910ed5401bSmrg 14920ed5401bSmrg /* feedback */ 14930ed5401bSmrg st_offset = len; 14940ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 1495b0ab5608Smrg if(vcn_ip_version_major == 1) 14960ed5401bSmrg ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ 14970ed5401bSmrg else 14980ed5401bSmrg ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ 14990ed5401bSmrg ib_cpu[len++] = 0x00000000; 15000ed5401bSmrg ib_cpu[len++] = fb_buf.addr >> 32; 15010ed5401bSmrg ib_cpu[len++] = fb_buf.addr; 15020ed5401bSmrg ib_cpu[len++] = 0x00000010; 15030ed5401bSmrg ib_cpu[len++] = 0x00000028; 15040ed5401bSmrg *st_size = (len - st_offset) * 4; 15050ed5401bSmrg 15060ed5401bSmrg /* intra refresh */ 15070ed5401bSmrg st_offset = len; 15080ed5401bSmrg st_size = &ib_cpu[len++]; 1509b0ab5608Smrg if(vcn_ip_version_major == 1) 15100ed5401bSmrg ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ 15110ed5401bSmrg else 15120ed5401bSmrg ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ 15130ed5401bSmrg ib_cpu[len++] = 0x00000000; 15140ed5401bSmrg ib_cpu[len++] = 0x00000000; 15150ed5401bSmrg ib_cpu[len++] = 0x00000000; 15160ed5401bSmrg *st_size = (len - st_offset) * 4; 15170ed5401bSmrg 1518b0ab5608Smrg if(vcn_ip_version_major != 1) { 15190ed5401bSmrg /* Input Format */ 15200ed5401bSmrg st_offset = len; 15210ed5401bSmrg st_size = &ib_cpu[len++]; 15220ed5401bSmrg ib_cpu[len++] = 0x0000000c; 15230ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 15240ed5401bSmrg ib_cpu[len++] = 0x00000000; 15250ed5401bSmrg ib_cpu[len++] = 0x00000000; 15260ed5401bSmrg ib_cpu[len++] = 0x00000000; 15270ed5401bSmrg ib_cpu[len++] = 0x00000000; 15280ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 15290ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */ 15300ed5401bSmrg *st_size = (len - st_offset) * 4; 15310ed5401bSmrg 15320ed5401bSmrg /* Output Format */ 15330ed5401bSmrg st_offset = len; 15340ed5401bSmrg st_size = &ib_cpu[len++]; 15350ed5401bSmrg ib_cpu[len++] = 0x0000000d; 15360ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 15370ed5401bSmrg ib_cpu[len++] = 0x00000000; 15380ed5401bSmrg ib_cpu[len++] = 0x00000000; 15390ed5401bSmrg ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 15400ed5401bSmrg *st_size = (len - st_offset) * 4; 15410ed5401bSmrg } 15420ed5401bSmrg /* op_speed */ 15430ed5401bSmrg st_offset = len; 15440ed5401bSmrg st_size = &ib_cpu[len++]; 15450ed5401bSmrg ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */ 15460ed5401bSmrg *st_size = (len - st_offset) * 4; 15470ed5401bSmrg 15480ed5401bSmrg /* op_enc */ 15490ed5401bSmrg st_offset = len; 15500ed5401bSmrg st_size = &ib_cpu[len++]; 15510ed5401bSmrg ib_cpu[len++] = 0x01000003; 15520ed5401bSmrg *st_size = (len - st_offset) * 4; 15530ed5401bSmrg 15540ed5401bSmrg *p_task_size = (len - task_offset) * 4; 1555b0ab5608Smrg 1556b0ab5608Smrg if (vcn_unified_ring) 1557b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 1558b0ab5608Smrg 15590ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 15600ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 15610ed5401bSmrg 15620ed5401bSmrg /* check result */ 15630ed5401bSmrg check_result(fb_buf, bs_buf, frame_type); 15640ed5401bSmrg 15650ed5401bSmrg free_resource(&fb_buf); 15660ed5401bSmrg free_resource(&bs_buf); 15670ed5401bSmrg free_resource(&vbv_buf); 1568d8807b2fSmrg} 1569d8807b2fSmrg 1570d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_encode(void) 1571d8807b2fSmrg{ 15720ed5401bSmrg amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */ 1573d8807b2fSmrg} 1574d8807b2fSmrg 1575d8807b2fSmrgstatic void amdgpu_cs_vcn_enc_destroy(void) 1576d8807b2fSmrg{ 15770ed5401bSmrg int len = 0, r; 15780ed5401bSmrg uint32_t *p_task_size = NULL; 15790ed5401bSmrg uint32_t task_offset = 0, st_offset; 15800ed5401bSmrg uint32_t *st_size = NULL; 15810ed5401bSmrg uint32_t fw_maj = 1, fw_min = 9; 15820ed5401bSmrg 1583b0ab5608Smrg if (vcn_ip_version_major == 2) { 15840ed5401bSmrg fw_maj = 1; 15850ed5401bSmrg fw_min = 1; 1586b0ab5608Smrg } else if (vcn_ip_version_major == 3) { 15870ed5401bSmrg fw_maj = 1; 15880ed5401bSmrg fw_min = 0; 15890ed5401bSmrg } 15900ed5401bSmrg 15910ed5401bSmrg num_resources = 0; 15920ed5401bSmrg/* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */ 15930ed5401bSmrg resources[num_resources++] = enc_buf.handle; 15940ed5401bSmrg resources[num_resources++] = ib_handle; 15950ed5401bSmrg 1596b0ab5608Smrg if (vcn_unified_ring) 1597b0ab5608Smrg amdgpu_cs_sq_head(ib_cpu, &len, true); 1598b0ab5608Smrg 15990ed5401bSmrg /* session info */ 16000ed5401bSmrg st_offset = len; 16010ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 16020ed5401bSmrg ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 16030ed5401bSmrg ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 16040ed5401bSmrg ib_cpu[len++] = enc_buf.addr >> 32; 16050ed5401bSmrg ib_cpu[len++] = enc_buf.addr; 16060ed5401bSmrg ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 16070ed5401bSmrg *st_size = (len - st_offset) * 4; 16080ed5401bSmrg 16090ed5401bSmrg /* task info */ 16100ed5401bSmrg task_offset = len; 16110ed5401bSmrg st_offset = len; 16120ed5401bSmrg st_size = &ib_cpu[len++]; /* size */ 16130ed5401bSmrg ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 16140ed5401bSmrg p_task_size = &ib_cpu[len++]; 16150ed5401bSmrg ib_cpu[len++] = enc_task_id++; /* task_id */ 16160ed5401bSmrg ib_cpu[len++] = 0; /* feedback */ 16170ed5401bSmrg *st_size = (len - st_offset) * 4; 16180ed5401bSmrg 16190ed5401bSmrg /* op close */ 16200ed5401bSmrg st_offset = len; 16210ed5401bSmrg st_size = &ib_cpu[len++]; 16220ed5401bSmrg ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */ 16230ed5401bSmrg *st_size = (len - st_offset) * 4; 16240ed5401bSmrg 16250ed5401bSmrg *p_task_size = (len - task_offset) * 4; 16260ed5401bSmrg 1627b0ab5608Smrg if (vcn_unified_ring) 1628b0ab5608Smrg amdgpu_cs_sq_ib_tail(ib_cpu + len); 1629b0ab5608Smrg 16300ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_ENC); 16310ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 16320ed5401bSmrg 16330ed5401bSmrg free_resource(&cpb_buf); 16340ed5401bSmrg free_resource(&enc_buf); 1635d8807b2fSmrg} 1636