1b8e80941Smrg/************************************************************************** 2b8e80941Smrg * 3b8e80941Smrg * Copyright 2011 Advanced Micro Devices, Inc. 4b8e80941Smrg * All Rights Reserved. 5b8e80941Smrg * 6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7b8e80941Smrg * copy of this software and associated documentation files (the 8b8e80941Smrg * "Software"), to deal in the Software without restriction, including 9b8e80941Smrg * without limitation the rights to use, copy, modify, merge, publish, 10b8e80941Smrg * distribute, sub license, and/or sell copies of the Software, and to 11b8e80941Smrg * permit persons to whom the Software is furnished to do so, subject to 12b8e80941Smrg * the following conditions: 13b8e80941Smrg * 14b8e80941Smrg * The above copyright notice and this permission notice (including the 15b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 16b8e80941Smrg * of the Software. 17b8e80941Smrg * 18b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20b8e80941Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21b8e80941Smrg * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR 22b8e80941Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23b8e80941Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24b8e80941Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25b8e80941Smrg * 26b8e80941Smrg **************************************************************************/ 27b8e80941Smrg 28b8e80941Smrg/* 29b8e80941Smrg * Authors: 30b8e80941Smrg * Christian König <christian.koenig@amd.com> 31b8e80941Smrg * 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrg#include <sys/types.h> 35b8e80941Smrg#include <assert.h> 36b8e80941Smrg#include <errno.h> 37b8e80941Smrg#include <unistd.h> 38b8e80941Smrg#include <stdio.h> 39b8e80941Smrg 40b8e80941Smrg#include "pipe/p_video_codec.h" 41b8e80941Smrg 42b8e80941Smrg#include "util/u_memory.h" 43b8e80941Smrg#include "util/u_video.h" 44b8e80941Smrg 45b8e80941Smrg#include "vl/vl_defines.h" 46b8e80941Smrg#include "vl/vl_mpeg12_decoder.h" 47b8e80941Smrg 48b8e80941Smrg#include "r600_pipe_common.h" 49b8e80941Smrg#include "radeon_video.h" 50b8e80941Smrg#include "radeon_uvd.h" 51b8e80941Smrg 52b8e80941Smrg#define NUM_BUFFERS 4 53b8e80941Smrg 54b8e80941Smrg#define NUM_MPEG2_REFS 6 55b8e80941Smrg#define NUM_H264_REFS 17 56b8e80941Smrg#define NUM_VC1_REFS 5 57b8e80941Smrg 58b8e80941Smrg#define FB_BUFFER_OFFSET 0x1000 59b8e80941Smrg#define FB_BUFFER_SIZE 2048 60b8e80941Smrg#define FB_BUFFER_SIZE_TONGA (2048 * 64) 61b8e80941Smrg#define IT_SCALING_TABLE_SIZE 992 62b8e80941Smrg#define UVD_SESSION_CONTEXT_SIZE (128 * 1024) 63b8e80941Smrg 64b8e80941Smrg/* UVD decoder representation */ 65b8e80941Smrgstruct ruvd_decoder { 66b8e80941Smrg struct pipe_video_codec base; 67b8e80941Smrg 68b8e80941Smrg ruvd_set_dtb set_dtb; 69b8e80941Smrg 70b8e80941Smrg unsigned stream_handle; 71b8e80941Smrg unsigned stream_type; 72b8e80941Smrg unsigned frame_number; 73b8e80941Smrg 74b8e80941Smrg struct pipe_screen *screen; 75b8e80941Smrg struct radeon_winsys* ws; 76b8e80941Smrg struct radeon_cmdbuf* cs; 77b8e80941Smrg 78b8e80941Smrg unsigned cur_buffer; 79b8e80941Smrg 80b8e80941Smrg struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; 81b8e80941Smrg struct ruvd_msg *msg; 82b8e80941Smrg uint32_t *fb; 83b8e80941Smrg unsigned fb_size; 84b8e80941Smrg uint8_t *it; 85b8e80941Smrg 86b8e80941Smrg struct rvid_buffer bs_buffers[NUM_BUFFERS]; 87b8e80941Smrg void* bs_ptr; 88b8e80941Smrg unsigned bs_size; 89b8e80941Smrg 90b8e80941Smrg struct rvid_buffer dpb; 91b8e80941Smrg bool use_legacy; 92b8e80941Smrg struct rvid_buffer ctx; 93b8e80941Smrg struct rvid_buffer sessionctx; 94b8e80941Smrg struct { 95b8e80941Smrg unsigned data0; 96b8e80941Smrg unsigned data1; 97b8e80941Smrg unsigned cmd; 98b8e80941Smrg unsigned cntl; 99b8e80941Smrg } reg; 100b8e80941Smrg}; 101b8e80941Smrg 102b8e80941Smrg/* flush IB to the hardware */ 103b8e80941Smrgstatic int flush(struct ruvd_decoder *dec, unsigned flags) 104b8e80941Smrg{ 105b8e80941Smrg return dec->ws->cs_flush(dec->cs, flags, NULL); 106b8e80941Smrg} 107b8e80941Smrg 108b8e80941Smrg/* add a new set register command to the IB */ 109b8e80941Smrgstatic void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) 110b8e80941Smrg{ 111b8e80941Smrg radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); 112b8e80941Smrg radeon_emit(dec->cs, val); 113b8e80941Smrg} 114b8e80941Smrg 115b8e80941Smrg/* send a command to the VCPU through the GPCOM registers */ 116b8e80941Smrgstatic void send_cmd(struct ruvd_decoder *dec, unsigned cmd, 117b8e80941Smrg struct pb_buffer* buf, uint32_t off, 118b8e80941Smrg enum radeon_bo_usage usage, enum radeon_bo_domain domain) 119b8e80941Smrg{ 120b8e80941Smrg int reloc_idx; 121b8e80941Smrg 122b8e80941Smrg reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, 123b8e80941Smrg domain, 0); 124b8e80941Smrg if (!dec->use_legacy) { 125b8e80941Smrg uint64_t addr; 126b8e80941Smrg addr = dec->ws->buffer_get_virtual_address(buf); 127b8e80941Smrg addr = addr + off; 128b8e80941Smrg set_reg(dec, dec->reg.data0, addr); 129b8e80941Smrg set_reg(dec, dec->reg.data1, addr >> 32); 130b8e80941Smrg } else { 131b8e80941Smrg off += dec->ws->buffer_get_reloc_offset(buf); 132b8e80941Smrg set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); 133b8e80941Smrg set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); 134b8e80941Smrg } 135b8e80941Smrg set_reg(dec, dec->reg.cmd, cmd << 1); 136b8e80941Smrg} 137b8e80941Smrg 138b8e80941Smrg/* do the codec needs an IT buffer ?*/ 139b8e80941Smrgstatic bool have_it(struct ruvd_decoder *dec) 140b8e80941Smrg{ 141b8e80941Smrg return dec->stream_type == RUVD_CODEC_H264_PERF || 142b8e80941Smrg dec->stream_type == RUVD_CODEC_H265; 143b8e80941Smrg} 144b8e80941Smrg 145b8e80941Smrg/* map the next available message/feedback/itscaling buffer */ 146b8e80941Smrgstatic void map_msg_fb_it_buf(struct ruvd_decoder *dec) 147b8e80941Smrg{ 148b8e80941Smrg struct rvid_buffer* buf; 149b8e80941Smrg uint8_t *ptr; 150b8e80941Smrg 151b8e80941Smrg /* grab the current message/feedback buffer */ 152b8e80941Smrg buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; 153b8e80941Smrg 154b8e80941Smrg /* and map it for CPU access */ 155b8e80941Smrg ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, 156b8e80941Smrg PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); 157b8e80941Smrg 158b8e80941Smrg /* calc buffer offsets */ 159b8e80941Smrg dec->msg = (struct ruvd_msg *)ptr; 160b8e80941Smrg memset(dec->msg, 0, sizeof(*dec->msg)); 161b8e80941Smrg 162b8e80941Smrg dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); 163b8e80941Smrg if (have_it(dec)) 164b8e80941Smrg dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); 165b8e80941Smrg} 166b8e80941Smrg 167b8e80941Smrg/* unmap and send a message command to the VCPU */ 168b8e80941Smrgstatic void send_msg_buf(struct ruvd_decoder *dec) 169b8e80941Smrg{ 170b8e80941Smrg struct rvid_buffer* buf; 171b8e80941Smrg 172b8e80941Smrg /* ignore the request if message/feedback buffer isn't mapped */ 173b8e80941Smrg if (!dec->msg || !dec->fb) 174b8e80941Smrg return; 175b8e80941Smrg 176b8e80941Smrg /* grab the current message buffer */ 177b8e80941Smrg buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; 178b8e80941Smrg 179b8e80941Smrg /* unmap the buffer */ 180b8e80941Smrg dec->ws->buffer_unmap(buf->res->buf); 181b8e80941Smrg dec->msg = NULL; 182b8e80941Smrg dec->fb = NULL; 183b8e80941Smrg dec->it = NULL; 184b8e80941Smrg 185b8e80941Smrg 186b8e80941Smrg if (dec->sessionctx.res) 187b8e80941Smrg send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, 188b8e80941Smrg dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, 189b8e80941Smrg RADEON_DOMAIN_VRAM); 190b8e80941Smrg 191b8e80941Smrg /* and send it to the hardware */ 192b8e80941Smrg send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, 193b8e80941Smrg RADEON_USAGE_READ, RADEON_DOMAIN_GTT); 194b8e80941Smrg} 195b8e80941Smrg 196b8e80941Smrg/* cycle to the next set of buffers */ 197b8e80941Smrgstatic void next_buffer(struct ruvd_decoder *dec) 198b8e80941Smrg{ 199b8e80941Smrg ++dec->cur_buffer; 200b8e80941Smrg dec->cur_buffer %= NUM_BUFFERS; 201b8e80941Smrg} 202b8e80941Smrg 203b8e80941Smrg/* convert the profile into something UVD understands */ 204b8e80941Smrgstatic uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) 205b8e80941Smrg{ 206b8e80941Smrg switch (u_reduce_video_profile(dec->base.profile)) { 207b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4_AVC: 208b8e80941Smrg return RUVD_CODEC_H264; 209b8e80941Smrg 210b8e80941Smrg case PIPE_VIDEO_FORMAT_VC1: 211b8e80941Smrg return RUVD_CODEC_VC1; 212b8e80941Smrg 213b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG12: 214b8e80941Smrg return RUVD_CODEC_MPEG2; 215b8e80941Smrg 216b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4: 217b8e80941Smrg return RUVD_CODEC_MPEG4; 218b8e80941Smrg 219b8e80941Smrg case PIPE_VIDEO_FORMAT_HEVC: 220b8e80941Smrg return RUVD_CODEC_H265; 221b8e80941Smrg 222b8e80941Smrg case PIPE_VIDEO_FORMAT_JPEG: 223b8e80941Smrg return RUVD_CODEC_MJPEG; 224b8e80941Smrg 225b8e80941Smrg default: 226b8e80941Smrg assert(0); 227b8e80941Smrg return 0; 228b8e80941Smrg } 229b8e80941Smrg} 230b8e80941Smrg 231b8e80941Smrgstatic unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) 232b8e80941Smrg{ 233b8e80941Smrg unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); 234b8e80941Smrg unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); 235b8e80941Smrg 236b8e80941Smrg unsigned max_references = dec->base.max_references + 1; 237b8e80941Smrg 238b8e80941Smrg if (dec->base.width * dec->base.height >= 4096*2000) 239b8e80941Smrg max_references = MAX2(max_references, 8); 240b8e80941Smrg else 241b8e80941Smrg max_references = MAX2(max_references, 17); 242b8e80941Smrg 243b8e80941Smrg width = align (width, 16); 244b8e80941Smrg height = align (height, 16); 245b8e80941Smrg return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; 246b8e80941Smrg} 247b8e80941Smrg 248b8e80941Smrgstatic unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) 249b8e80941Smrg{ 250b8e80941Smrg unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; 251b8e80941Smrg unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; 252b8e80941Smrg unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); 253b8e80941Smrg 254b8e80941Smrg unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); 255b8e80941Smrg unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); 256b8e80941Smrg unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; 257b8e80941Smrg 258b8e80941Smrg unsigned max_references = dec->base.max_references + 1; 259b8e80941Smrg 260b8e80941Smrg if (dec->base.width * dec->base.height >= 4096*2000) 261b8e80941Smrg max_references = MAX2(max_references, 8); 262b8e80941Smrg else 263b8e80941Smrg max_references = MAX2(max_references, 17); 264b8e80941Smrg 265b8e80941Smrg log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + 266b8e80941Smrg pic->pps->sps->log2_diff_max_min_luma_coding_block_size; 267b8e80941Smrg 268b8e80941Smrg width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; 269b8e80941Smrg height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; 270b8e80941Smrg 271b8e80941Smrg num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); 272b8e80941Smrg context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); 273b8e80941Smrg max_mb_address = (unsigned) ceil(height * 8 / 2048.0); 274b8e80941Smrg 275b8e80941Smrg cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; 276b8e80941Smrg db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); 277b8e80941Smrg 278b8e80941Smrg return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; 279b8e80941Smrg} 280b8e80941Smrg 281b8e80941Smrgstatic unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) 282b8e80941Smrg{ 283b8e80941Smrg return 16; 284b8e80941Smrg} 285b8e80941Smrg 286b8e80941Smrg/* calculate size of reference picture buffer */ 287b8e80941Smrgstatic unsigned calc_dpb_size(struct ruvd_decoder *dec) 288b8e80941Smrg{ 289b8e80941Smrg unsigned width_in_mb, height_in_mb, image_size, dpb_size; 290b8e80941Smrg 291b8e80941Smrg // always align them to MB size for dpb calculation 292b8e80941Smrg unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); 293b8e80941Smrg unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); 294b8e80941Smrg 295b8e80941Smrg // always one more for currently decoded picture 296b8e80941Smrg unsigned max_references = dec->base.max_references + 1; 297b8e80941Smrg 298b8e80941Smrg // aligned size of a single frame 299b8e80941Smrg image_size = align(width, get_db_pitch_alignment(dec)) * height; 300b8e80941Smrg image_size += image_size / 2; 301b8e80941Smrg image_size = align(image_size, 1024); 302b8e80941Smrg 303b8e80941Smrg // picture width & height in 16 pixel units 304b8e80941Smrg width_in_mb = width / VL_MACROBLOCK_WIDTH; 305b8e80941Smrg height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); 306b8e80941Smrg 307b8e80941Smrg switch (u_reduce_video_profile(dec->base.profile)) { 308b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4_AVC: { 309b8e80941Smrg if (!dec->use_legacy) { 310b8e80941Smrg unsigned fs_in_mb = width_in_mb * height_in_mb; 311b8e80941Smrg unsigned alignment = 64, num_dpb_buffer; 312b8e80941Smrg 313b8e80941Smrg if (dec->stream_type == RUVD_CODEC_H264_PERF) 314b8e80941Smrg alignment = 256; 315b8e80941Smrg switch(dec->base.level) { 316b8e80941Smrg case 30: 317b8e80941Smrg num_dpb_buffer = 8100 / fs_in_mb; 318b8e80941Smrg break; 319b8e80941Smrg case 31: 320b8e80941Smrg num_dpb_buffer = 18000 / fs_in_mb; 321b8e80941Smrg break; 322b8e80941Smrg case 32: 323b8e80941Smrg num_dpb_buffer = 20480 / fs_in_mb; 324b8e80941Smrg break; 325b8e80941Smrg case 41: 326b8e80941Smrg num_dpb_buffer = 32768 / fs_in_mb; 327b8e80941Smrg break; 328b8e80941Smrg case 42: 329b8e80941Smrg num_dpb_buffer = 34816 / fs_in_mb; 330b8e80941Smrg break; 331b8e80941Smrg case 50: 332b8e80941Smrg num_dpb_buffer = 110400 / fs_in_mb; 333b8e80941Smrg break; 334b8e80941Smrg case 51: 335b8e80941Smrg num_dpb_buffer = 184320 / fs_in_mb; 336b8e80941Smrg break; 337b8e80941Smrg default: 338b8e80941Smrg num_dpb_buffer = 184320 / fs_in_mb; 339b8e80941Smrg break; 340b8e80941Smrg } 341b8e80941Smrg num_dpb_buffer++; 342b8e80941Smrg max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); 343b8e80941Smrg dpb_size = image_size * max_references; 344b8e80941Smrg if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { 345b8e80941Smrg dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); 346b8e80941Smrg dpb_size += align(width_in_mb * height_in_mb * 32, alignment); 347b8e80941Smrg } 348b8e80941Smrg } else { 349b8e80941Smrg // the firmware seems to allways assume a minimum of ref frames 350b8e80941Smrg max_references = MAX2(NUM_H264_REFS, max_references); 351b8e80941Smrg // reference picture buffer 352b8e80941Smrg dpb_size = image_size * max_references; 353b8e80941Smrg if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { 354b8e80941Smrg // macroblock context buffer 355b8e80941Smrg dpb_size += width_in_mb * height_in_mb * max_references * 192; 356b8e80941Smrg // IT surface buffer 357b8e80941Smrg dpb_size += width_in_mb * height_in_mb * 32; 358b8e80941Smrg } 359b8e80941Smrg } 360b8e80941Smrg break; 361b8e80941Smrg } 362b8e80941Smrg 363b8e80941Smrg case PIPE_VIDEO_FORMAT_HEVC: 364b8e80941Smrg if (dec->base.width * dec->base.height >= 4096*2000) 365b8e80941Smrg max_references = MAX2(max_references, 8); 366b8e80941Smrg else 367b8e80941Smrg max_references = MAX2(max_references, 17); 368b8e80941Smrg 369b8e80941Smrg width = align (width, 16); 370b8e80941Smrg height = align (height, 16); 371b8e80941Smrg if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) 372b8e80941Smrg dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references; 373b8e80941Smrg else 374b8e80941Smrg dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references; 375b8e80941Smrg break; 376b8e80941Smrg 377b8e80941Smrg case PIPE_VIDEO_FORMAT_VC1: 378b8e80941Smrg // the firmware seems to allways assume a minimum of ref frames 379b8e80941Smrg max_references = MAX2(NUM_VC1_REFS, max_references); 380b8e80941Smrg 381b8e80941Smrg // reference picture buffer 382b8e80941Smrg dpb_size = image_size * max_references; 383b8e80941Smrg 384b8e80941Smrg // CONTEXT_BUFFER 385b8e80941Smrg dpb_size += width_in_mb * height_in_mb * 128; 386b8e80941Smrg 387b8e80941Smrg // IT surface buffer 388b8e80941Smrg dpb_size += width_in_mb * 64; 389b8e80941Smrg 390b8e80941Smrg // DB surface buffer 391b8e80941Smrg dpb_size += width_in_mb * 128; 392b8e80941Smrg 393b8e80941Smrg // BP 394b8e80941Smrg dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); 395b8e80941Smrg break; 396b8e80941Smrg 397b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG12: 398b8e80941Smrg // reference picture buffer, must be big enough for all frames 399b8e80941Smrg dpb_size = image_size * NUM_MPEG2_REFS; 400b8e80941Smrg break; 401b8e80941Smrg 402b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4: 403b8e80941Smrg // reference picture buffer 404b8e80941Smrg dpb_size = image_size * max_references; 405b8e80941Smrg 406b8e80941Smrg // CM 407b8e80941Smrg dpb_size += width_in_mb * height_in_mb * 64; 408b8e80941Smrg 409b8e80941Smrg // IT surface buffer 410b8e80941Smrg dpb_size += align(width_in_mb * height_in_mb * 32, 64); 411b8e80941Smrg 412b8e80941Smrg dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); 413b8e80941Smrg break; 414b8e80941Smrg 415b8e80941Smrg case PIPE_VIDEO_FORMAT_JPEG: 416b8e80941Smrg dpb_size = 0; 417b8e80941Smrg break; 418b8e80941Smrg 419b8e80941Smrg default: 420b8e80941Smrg // something is missing here 421b8e80941Smrg assert(0); 422b8e80941Smrg 423b8e80941Smrg // at least use a sane default value 424b8e80941Smrg dpb_size = 32 * 1024 * 1024; 425b8e80941Smrg break; 426b8e80941Smrg } 427b8e80941Smrg return dpb_size; 428b8e80941Smrg} 429b8e80941Smrg 430b8e80941Smrg/* free associated data in the video buffer callback */ 431b8e80941Smrgstatic void ruvd_destroy_associated_data(void *data) 432b8e80941Smrg{ 433b8e80941Smrg /* NOOP, since we only use an intptr */ 434b8e80941Smrg} 435b8e80941Smrg 436b8e80941Smrg/* get h264 specific message bits */ 437b8e80941Smrgstatic struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) 438b8e80941Smrg{ 439b8e80941Smrg struct ruvd_h264 result; 440b8e80941Smrg 441b8e80941Smrg memset(&result, 0, sizeof(result)); 442b8e80941Smrg switch (pic->base.profile) { 443b8e80941Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: 444b8e80941Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: 445b8e80941Smrg result.profile = RUVD_H264_PROFILE_BASELINE; 446b8e80941Smrg break; 447b8e80941Smrg 448b8e80941Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: 449b8e80941Smrg result.profile = RUVD_H264_PROFILE_MAIN; 450b8e80941Smrg break; 451b8e80941Smrg 452b8e80941Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: 453b8e80941Smrg result.profile = RUVD_H264_PROFILE_HIGH; 454b8e80941Smrg break; 455b8e80941Smrg 456b8e80941Smrg default: 457b8e80941Smrg assert(0); 458b8e80941Smrg break; 459b8e80941Smrg } 460b8e80941Smrg 461b8e80941Smrg result.level = dec->base.level; 462b8e80941Smrg 463b8e80941Smrg result.sps_info_flags = 0; 464b8e80941Smrg result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; 465b8e80941Smrg result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; 466b8e80941Smrg result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; 467b8e80941Smrg result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; 468b8e80941Smrg 469b8e80941Smrg result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; 470b8e80941Smrg result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; 471b8e80941Smrg result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; 472b8e80941Smrg result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; 473b8e80941Smrg result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; 474b8e80941Smrg 475b8e80941Smrg switch (dec->base.chroma_format) { 476b8e80941Smrg case PIPE_VIDEO_CHROMA_FORMAT_NONE: 477b8e80941Smrg /* TODO: assert? */ 478b8e80941Smrg break; 479b8e80941Smrg case PIPE_VIDEO_CHROMA_FORMAT_400: 480b8e80941Smrg result.chroma_format = 0; 481b8e80941Smrg break; 482b8e80941Smrg case PIPE_VIDEO_CHROMA_FORMAT_420: 483b8e80941Smrg result.chroma_format = 1; 484b8e80941Smrg break; 485b8e80941Smrg case PIPE_VIDEO_CHROMA_FORMAT_422: 486b8e80941Smrg result.chroma_format = 2; 487b8e80941Smrg break; 488b8e80941Smrg case PIPE_VIDEO_CHROMA_FORMAT_444: 489b8e80941Smrg result.chroma_format = 3; 490b8e80941Smrg break; 491b8e80941Smrg } 492b8e80941Smrg 493b8e80941Smrg result.pps_info_flags = 0; 494b8e80941Smrg result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; 495b8e80941Smrg result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; 496b8e80941Smrg result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; 497b8e80941Smrg result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; 498b8e80941Smrg result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; 499b8e80941Smrg result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; 500b8e80941Smrg result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; 501b8e80941Smrg result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; 502b8e80941Smrg 503b8e80941Smrg result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; 504b8e80941Smrg result.slice_group_map_type = pic->pps->slice_group_map_type; 505b8e80941Smrg result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; 506b8e80941Smrg result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; 507b8e80941Smrg result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; 508b8e80941Smrg result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; 509b8e80941Smrg 510b8e80941Smrg memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); 511b8e80941Smrg memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); 512b8e80941Smrg 513b8e80941Smrg if (dec->stream_type == RUVD_CODEC_H264_PERF) { 514b8e80941Smrg memcpy(dec->it, result.scaling_list_4x4, 6*16); 515b8e80941Smrg memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); 516b8e80941Smrg } 517b8e80941Smrg 518b8e80941Smrg result.num_ref_frames = pic->num_ref_frames; 519b8e80941Smrg 520b8e80941Smrg result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; 521b8e80941Smrg result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; 522b8e80941Smrg 523b8e80941Smrg result.frame_num = pic->frame_num; 524b8e80941Smrg memcpy(result.frame_num_list, pic->frame_num_list, 4*16); 525b8e80941Smrg result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; 526b8e80941Smrg result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; 527b8e80941Smrg memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); 528b8e80941Smrg 529b8e80941Smrg result.decoded_pic_idx = pic->frame_num; 530b8e80941Smrg 531b8e80941Smrg return result; 532b8e80941Smrg} 533b8e80941Smrg 534b8e80941Smrg/* get h265 specific message bits */ 535b8e80941Smrgstatic struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, 536b8e80941Smrg struct pipe_h265_picture_desc *pic) 537b8e80941Smrg{ 538b8e80941Smrg struct ruvd_h265 result; 539b8e80941Smrg unsigned i; 540b8e80941Smrg 541b8e80941Smrg memset(&result, 0, sizeof(result)); 542b8e80941Smrg 543b8e80941Smrg result.sps_info_flags = 0; 544b8e80941Smrg result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; 545b8e80941Smrg result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; 546b8e80941Smrg result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; 547b8e80941Smrg result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; 548b8e80941Smrg result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; 549b8e80941Smrg result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; 550b8e80941Smrg result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; 551b8e80941Smrg result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; 552b8e80941Smrg result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; 553b8e80941Smrg if (pic->UseRefPicList == true) 554b8e80941Smrg result.sps_info_flags |= 1 << 10; 555b8e80941Smrg 556b8e80941Smrg result.chroma_format = pic->pps->sps->chroma_format_idc; 557b8e80941Smrg result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; 558b8e80941Smrg result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; 559b8e80941Smrg result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; 560b8e80941Smrg result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; 561b8e80941Smrg result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; 562b8e80941Smrg result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; 563b8e80941Smrg result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; 564b8e80941Smrg result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; 565b8e80941Smrg result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; 566b8e80941Smrg result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; 567b8e80941Smrg result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; 568b8e80941Smrg result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; 569b8e80941Smrg result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; 570b8e80941Smrg result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; 571b8e80941Smrg result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; 572b8e80941Smrg 573b8e80941Smrg result.pps_info_flags = 0; 574b8e80941Smrg result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; 575b8e80941Smrg result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; 576b8e80941Smrg result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; 577b8e80941Smrg result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; 578b8e80941Smrg result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; 579b8e80941Smrg result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; 580b8e80941Smrg result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; 581b8e80941Smrg result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; 582b8e80941Smrg result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; 583b8e80941Smrg result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; 584b8e80941Smrg result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; 585b8e80941Smrg result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; 586b8e80941Smrg result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; 587b8e80941Smrg result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; 588b8e80941Smrg result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; 589b8e80941Smrg result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; 590b8e80941Smrg result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; 591b8e80941Smrg result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; 592b8e80941Smrg result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; 593b8e80941Smrg result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; 594b8e80941Smrg //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? 595b8e80941Smrg 596b8e80941Smrg result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; 597b8e80941Smrg result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; 598b8e80941Smrg result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; 599b8e80941Smrg result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; 600b8e80941Smrg result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; 601b8e80941Smrg result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; 602b8e80941Smrg result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; 603b8e80941Smrg result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; 604b8e80941Smrg result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; 605b8e80941Smrg result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; 606b8e80941Smrg result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; 607b8e80941Smrg result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; 608b8e80941Smrg result.init_qp_minus26 = pic->pps->init_qp_minus26; 609b8e80941Smrg 610b8e80941Smrg for (i = 0; i < 19; ++i) 611b8e80941Smrg result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; 612b8e80941Smrg 613b8e80941Smrg for (i = 0; i < 21; ++i) 614b8e80941Smrg result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; 615b8e80941Smrg 616b8e80941Smrg result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; 617b8e80941Smrg result.curr_idx = pic->CurrPicOrderCntVal; 618b8e80941Smrg result.curr_poc = pic->CurrPicOrderCntVal; 619b8e80941Smrg 620b8e80941Smrg vl_video_buffer_set_associated_data(target, &dec->base, 621b8e80941Smrg (void *)(uintptr_t)pic->CurrPicOrderCntVal, 622b8e80941Smrg &ruvd_destroy_associated_data); 623b8e80941Smrg 624b8e80941Smrg for (i = 0; i < 16; ++i) { 625b8e80941Smrg struct pipe_video_buffer *ref = pic->ref[i]; 626b8e80941Smrg uintptr_t ref_pic = 0; 627b8e80941Smrg 628b8e80941Smrg result.poc_list[i] = pic->PicOrderCntVal[i]; 629b8e80941Smrg 630b8e80941Smrg if (ref) 631b8e80941Smrg ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); 632b8e80941Smrg else 633b8e80941Smrg ref_pic = 0x7F; 634b8e80941Smrg result.ref_pic_list[i] = ref_pic; 635b8e80941Smrg } 636b8e80941Smrg 637b8e80941Smrg for (i = 0; i < 8; ++i) { 638b8e80941Smrg result.ref_pic_set_st_curr_before[i] = 0xFF; 639b8e80941Smrg result.ref_pic_set_st_curr_after[i] = 0xFF; 640b8e80941Smrg result.ref_pic_set_lt_curr[i] = 0xFF; 641b8e80941Smrg } 642b8e80941Smrg 643b8e80941Smrg for (i = 0; i < pic->NumPocStCurrBefore; ++i) 644b8e80941Smrg result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; 645b8e80941Smrg 646b8e80941Smrg for (i = 0; i < pic->NumPocStCurrAfter; ++i) 647b8e80941Smrg result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; 648b8e80941Smrg 649b8e80941Smrg for (i = 0; i < pic->NumPocLtCurr; ++i) 650b8e80941Smrg result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; 651b8e80941Smrg 652b8e80941Smrg for (i = 0; i < 6; ++i) 653b8e80941Smrg result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; 654b8e80941Smrg 655b8e80941Smrg for (i = 0; i < 2; ++i) 656b8e80941Smrg result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; 657b8e80941Smrg 658b8e80941Smrg memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); 659b8e80941Smrg memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); 660b8e80941Smrg memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); 661b8e80941Smrg memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); 662b8e80941Smrg 663b8e80941Smrg for (i = 0 ; i < 2 ; i++) { 664b8e80941Smrg for (int j = 0 ; j < 15 ; j++) 665b8e80941Smrg result.direct_reflist[i][j] = pic->RefPicList[i][j]; 666b8e80941Smrg } 667b8e80941Smrg 668b8e80941Smrg if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { 669b8e80941Smrg if (target->buffer_format == PIPE_FORMAT_P016) { 670b8e80941Smrg result.p010_mode = 1; 671b8e80941Smrg result.msb_mode = 1; 672b8e80941Smrg } else { 673b8e80941Smrg result.luma_10to8 = 5; 674b8e80941Smrg result.chroma_10to8 = 5; 675b8e80941Smrg result.sclr_luma10to8 = 4; 676b8e80941Smrg result.sclr_chroma10to8 = 4; 677b8e80941Smrg } 678b8e80941Smrg } 679b8e80941Smrg 680b8e80941Smrg /* TODO 681b8e80941Smrg result.highestTid; 682b8e80941Smrg result.isNonRef; 683b8e80941Smrg 684b8e80941Smrg IDRPicFlag; 685b8e80941Smrg RAPPicFlag; 686b8e80941Smrg NumPocTotalCurr; 687b8e80941Smrg NumShortTermPictureSliceHeaderBits; 688b8e80941Smrg NumLongTermPictureSliceHeaderBits; 689b8e80941Smrg 690b8e80941Smrg IsLongTerm[16]; 691b8e80941Smrg */ 692b8e80941Smrg 693b8e80941Smrg return result; 694b8e80941Smrg} 695b8e80941Smrg 696b8e80941Smrg/* get vc1 specific message bits */ 697b8e80941Smrgstatic struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) 698b8e80941Smrg{ 699b8e80941Smrg struct ruvd_vc1 result; 700b8e80941Smrg 701b8e80941Smrg memset(&result, 0, sizeof(result)); 702b8e80941Smrg 703b8e80941Smrg switch(pic->base.profile) { 704b8e80941Smrg case PIPE_VIDEO_PROFILE_VC1_SIMPLE: 705b8e80941Smrg result.profile = RUVD_VC1_PROFILE_SIMPLE; 706b8e80941Smrg result.level = 1; 707b8e80941Smrg break; 708b8e80941Smrg 709b8e80941Smrg case PIPE_VIDEO_PROFILE_VC1_MAIN: 710b8e80941Smrg result.profile = RUVD_VC1_PROFILE_MAIN; 711b8e80941Smrg result.level = 2; 712b8e80941Smrg break; 713b8e80941Smrg 714b8e80941Smrg case PIPE_VIDEO_PROFILE_VC1_ADVANCED: 715b8e80941Smrg result.profile = RUVD_VC1_PROFILE_ADVANCED; 716b8e80941Smrg result.level = 4; 717b8e80941Smrg break; 718b8e80941Smrg 719b8e80941Smrg default: 720b8e80941Smrg assert(0); 721b8e80941Smrg } 722b8e80941Smrg 723b8e80941Smrg /* fields common for all profiles */ 724b8e80941Smrg result.sps_info_flags |= pic->postprocflag << 7; 725b8e80941Smrg result.sps_info_flags |= pic->pulldown << 6; 726b8e80941Smrg result.sps_info_flags |= pic->interlace << 5; 727b8e80941Smrg result.sps_info_flags |= pic->tfcntrflag << 4; 728b8e80941Smrg result.sps_info_flags |= pic->finterpflag << 3; 729b8e80941Smrg result.sps_info_flags |= pic->psf << 1; 730b8e80941Smrg 731b8e80941Smrg result.pps_info_flags |= pic->range_mapy_flag << 31; 732b8e80941Smrg result.pps_info_flags |= pic->range_mapy << 28; 733b8e80941Smrg result.pps_info_flags |= pic->range_mapuv_flag << 27; 734b8e80941Smrg result.pps_info_flags |= pic->range_mapuv << 24; 735b8e80941Smrg result.pps_info_flags |= pic->multires << 21; 736b8e80941Smrg result.pps_info_flags |= pic->maxbframes << 16; 737b8e80941Smrg result.pps_info_flags |= pic->overlap << 11; 738b8e80941Smrg result.pps_info_flags |= pic->quantizer << 9; 739b8e80941Smrg result.pps_info_flags |= pic->panscan_flag << 7; 740b8e80941Smrg result.pps_info_flags |= pic->refdist_flag << 6; 741b8e80941Smrg result.pps_info_flags |= pic->vstransform << 0; 742b8e80941Smrg 743b8e80941Smrg /* some fields only apply to main/advanced profile */ 744b8e80941Smrg if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { 745b8e80941Smrg result.pps_info_flags |= pic->syncmarker << 20; 746b8e80941Smrg result.pps_info_flags |= pic->rangered << 19; 747b8e80941Smrg result.pps_info_flags |= pic->loopfilter << 5; 748b8e80941Smrg result.pps_info_flags |= pic->fastuvmc << 4; 749b8e80941Smrg result.pps_info_flags |= pic->extended_mv << 3; 750b8e80941Smrg result.pps_info_flags |= pic->extended_dmv << 8; 751b8e80941Smrg result.pps_info_flags |= pic->dquant << 1; 752b8e80941Smrg } 753b8e80941Smrg 754b8e80941Smrg result.chroma_format = 1; 755b8e80941Smrg 756b8e80941Smrg#if 0 757b8e80941Smrg//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) 758b8e80941Smrguint32_t slice_count 759b8e80941Smrguint8_t picture_type 760b8e80941Smrguint8_t frame_coding_mode 761b8e80941Smrguint8_t deblockEnable 762b8e80941Smrguint8_t pquant 763b8e80941Smrg#endif 764b8e80941Smrg 765b8e80941Smrg return result; 766b8e80941Smrg} 767b8e80941Smrg 768b8e80941Smrg/* extract the frame number from a referenced video buffer */ 769b8e80941Smrgstatic uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) 770b8e80941Smrg{ 771b8e80941Smrg uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; 772b8e80941Smrg uint32_t max = MAX2(dec->frame_number, 1) - 1; 773b8e80941Smrg uintptr_t frame; 774b8e80941Smrg 775b8e80941Smrg /* seems to be the most sane fallback */ 776b8e80941Smrg if (!ref) 777b8e80941Smrg return max; 778b8e80941Smrg 779b8e80941Smrg /* get the frame number from the associated data */ 780b8e80941Smrg frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); 781b8e80941Smrg 782b8e80941Smrg /* limit the frame number to a valid range */ 783b8e80941Smrg return MAX2(MIN2(frame, max), min); 784b8e80941Smrg} 785b8e80941Smrg 786b8e80941Smrg/* get mpeg2 specific msg bits */ 787b8e80941Smrgstatic struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, 788b8e80941Smrg struct pipe_mpeg12_picture_desc *pic) 789b8e80941Smrg{ 790b8e80941Smrg const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; 791b8e80941Smrg struct ruvd_mpeg2 result; 792b8e80941Smrg unsigned i; 793b8e80941Smrg 794b8e80941Smrg memset(&result, 0, sizeof(result)); 795b8e80941Smrg result.decoded_pic_idx = dec->frame_number; 796b8e80941Smrg for (i = 0; i < 2; ++i) 797b8e80941Smrg result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); 798b8e80941Smrg 799b8e80941Smrg result.load_intra_quantiser_matrix = 1; 800b8e80941Smrg result.load_nonintra_quantiser_matrix = 1; 801b8e80941Smrg 802b8e80941Smrg for (i = 0; i < 64; ++i) { 803b8e80941Smrg result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; 804b8e80941Smrg result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; 805b8e80941Smrg } 806b8e80941Smrg 807b8e80941Smrg result.profile_and_level_indication = 0; 808b8e80941Smrg result.chroma_format = 0x1; 809b8e80941Smrg 810b8e80941Smrg result.picture_coding_type = pic->picture_coding_type; 811b8e80941Smrg result.f_code[0][0] = pic->f_code[0][0] + 1; 812b8e80941Smrg result.f_code[0][1] = pic->f_code[0][1] + 1; 813b8e80941Smrg result.f_code[1][0] = pic->f_code[1][0] + 1; 814b8e80941Smrg result.f_code[1][1] = pic->f_code[1][1] + 1; 815b8e80941Smrg result.intra_dc_precision = pic->intra_dc_precision; 816b8e80941Smrg result.pic_structure = pic->picture_structure; 817b8e80941Smrg result.top_field_first = pic->top_field_first; 818b8e80941Smrg result.frame_pred_frame_dct = pic->frame_pred_frame_dct; 819b8e80941Smrg result.concealment_motion_vectors = pic->concealment_motion_vectors; 820b8e80941Smrg result.q_scale_type = pic->q_scale_type; 821b8e80941Smrg result.intra_vlc_format = pic->intra_vlc_format; 822b8e80941Smrg result.alternate_scan = pic->alternate_scan; 823b8e80941Smrg 824b8e80941Smrg return result; 825b8e80941Smrg} 826b8e80941Smrg 827b8e80941Smrg/* get mpeg4 specific msg bits */ 828b8e80941Smrgstatic struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, 829b8e80941Smrg struct pipe_mpeg4_picture_desc *pic) 830b8e80941Smrg{ 831b8e80941Smrg struct ruvd_mpeg4 result; 832b8e80941Smrg unsigned i; 833b8e80941Smrg 834b8e80941Smrg memset(&result, 0, sizeof(result)); 835b8e80941Smrg result.decoded_pic_idx = dec->frame_number; 836b8e80941Smrg for (i = 0; i < 2; ++i) 837b8e80941Smrg result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); 838b8e80941Smrg 839b8e80941Smrg result.variant_type = 0; 840b8e80941Smrg result.profile_and_level_indication = 0xF0; // ASP Level0 841b8e80941Smrg 842b8e80941Smrg result.video_object_layer_verid = 0x5; // advanced simple 843b8e80941Smrg result.video_object_layer_shape = 0x0; // rectangular 844b8e80941Smrg 845b8e80941Smrg result.video_object_layer_width = dec->base.width; 846b8e80941Smrg result.video_object_layer_height = dec->base.height; 847b8e80941Smrg 848b8e80941Smrg result.vop_time_increment_resolution = pic->vop_time_increment_resolution; 849b8e80941Smrg 850b8e80941Smrg result.flags |= pic->short_video_header << 0; 851b8e80941Smrg //result.flags |= obmc_disable << 1; 852b8e80941Smrg result.flags |= pic->interlaced << 2; 853b8e80941Smrg result.flags |= 1 << 3; // load_intra_quant_mat 854b8e80941Smrg result.flags |= 1 << 4; // load_nonintra_quant_mat 855b8e80941Smrg result.flags |= pic->quarter_sample << 5; 856b8e80941Smrg result.flags |= 1 << 6; // complexity_estimation_disable 857b8e80941Smrg result.flags |= pic->resync_marker_disable << 7; 858b8e80941Smrg //result.flags |= data_partitioned << 8; 859b8e80941Smrg //result.flags |= reversible_vlc << 9; 860b8e80941Smrg result.flags |= 0 << 10; // newpred_enable 861b8e80941Smrg result.flags |= 0 << 11; // reduced_resolution_vop_enable 862b8e80941Smrg //result.flags |= scalability << 12; 863b8e80941Smrg //result.flags |= is_object_layer_identifier << 13; 864b8e80941Smrg //result.flags |= fixed_vop_rate << 14; 865b8e80941Smrg //result.flags |= newpred_segment_type << 15; 866b8e80941Smrg 867b8e80941Smrg result.quant_type = pic->quant_type; 868b8e80941Smrg 869b8e80941Smrg for (i = 0; i < 64; ++i) { 870b8e80941Smrg result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; 871b8e80941Smrg result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; 872b8e80941Smrg } 873b8e80941Smrg 874b8e80941Smrg /* 875b8e80941Smrg int32_t trd [2] 876b8e80941Smrg int32_t trb [2] 877b8e80941Smrg uint8_t vop_coding_type 878b8e80941Smrg uint8_t vop_fcode_forward 879b8e80941Smrg uint8_t vop_fcode_backward 880b8e80941Smrg uint8_t rounding_control 881b8e80941Smrg uint8_t alternate_vertical_scan_flag 882b8e80941Smrg uint8_t top_field_first 883b8e80941Smrg */ 884b8e80941Smrg 885b8e80941Smrg return result; 886b8e80941Smrg} 887b8e80941Smrg 888b8e80941Smrgstatic void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic) 889b8e80941Smrg{ 890b8e80941Smrg int size = 0, saved_size, len_pos, i; 891b8e80941Smrg uint16_t *bs; 892b8e80941Smrg uint8_t *buf = dec->bs_ptr; 893b8e80941Smrg 894b8e80941Smrg /* SOI */ 895b8e80941Smrg buf[size++] = 0xff; 896b8e80941Smrg buf[size++] = 0xd8; 897b8e80941Smrg 898b8e80941Smrg /* DQT */ 899b8e80941Smrg buf[size++] = 0xff; 900b8e80941Smrg buf[size++] = 0xdb; 901b8e80941Smrg 902b8e80941Smrg len_pos = size++; 903b8e80941Smrg size++; 904b8e80941Smrg 905b8e80941Smrg for (i = 0; i < 4; ++i) { 906b8e80941Smrg if (pic->quantization_table.load_quantiser_table[i] == 0) 907b8e80941Smrg continue; 908b8e80941Smrg 909b8e80941Smrg buf[size++] = i; 910b8e80941Smrg memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64); 911b8e80941Smrg size += 64; 912b8e80941Smrg } 913b8e80941Smrg 914b8e80941Smrg bs = (uint16_t*)&buf[len_pos]; 915b8e80941Smrg *bs = util_bswap16(size - 4); 916b8e80941Smrg 917b8e80941Smrg saved_size = size; 918b8e80941Smrg 919b8e80941Smrg /* DHT */ 920b8e80941Smrg buf[size++] = 0xff; 921b8e80941Smrg buf[size++] = 0xc4; 922b8e80941Smrg 923b8e80941Smrg len_pos = size++; 924b8e80941Smrg size++; 925b8e80941Smrg 926b8e80941Smrg for (i = 0; i < 2; ++i) { 927b8e80941Smrg if (pic->huffman_table.load_huffman_table[i] == 0) 928b8e80941Smrg continue; 929b8e80941Smrg 930b8e80941Smrg buf[size++] = 0x00 | i; 931b8e80941Smrg memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16); 932b8e80941Smrg size += 16; 933b8e80941Smrg memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12); 934b8e80941Smrg size += 12; 935b8e80941Smrg } 936b8e80941Smrg 937b8e80941Smrg for (i = 0; i < 2; ++i) { 938b8e80941Smrg if (pic->huffman_table.load_huffman_table[i] == 0) 939b8e80941Smrg continue; 940b8e80941Smrg 941b8e80941Smrg buf[size++] = 0x10 | i; 942b8e80941Smrg memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16); 943b8e80941Smrg size += 16; 944b8e80941Smrg memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162); 945b8e80941Smrg size += 162; 946b8e80941Smrg } 947b8e80941Smrg 948b8e80941Smrg bs = (uint16_t*)&buf[len_pos]; 949b8e80941Smrg *bs = util_bswap16(size - saved_size - 2); 950b8e80941Smrg 951b8e80941Smrg saved_size = size; 952b8e80941Smrg 953b8e80941Smrg /* DRI */ 954b8e80941Smrg if (pic->slice_parameter.restart_interval) { 955b8e80941Smrg buf[size++] = 0xff; 956b8e80941Smrg buf[size++] = 0xdd; 957b8e80941Smrg buf[size++] = 0x00; 958b8e80941Smrg buf[size++] = 0x04; 959b8e80941Smrg bs = (uint16_t*)&buf[size++]; 960b8e80941Smrg *bs = util_bswap16(pic->slice_parameter.restart_interval); 961b8e80941Smrg saved_size = ++size; 962b8e80941Smrg } 963b8e80941Smrg 964b8e80941Smrg /* SOF */ 965b8e80941Smrg buf[size++] = 0xff; 966b8e80941Smrg buf[size++] = 0xc0; 967b8e80941Smrg 968b8e80941Smrg len_pos = size++; 969b8e80941Smrg size++; 970b8e80941Smrg 971b8e80941Smrg buf[size++] = 0x08; 972b8e80941Smrg 973b8e80941Smrg bs = (uint16_t*)&buf[size++]; 974b8e80941Smrg *bs = util_bswap16(pic->picture_parameter.picture_height); 975b8e80941Smrg size++; 976b8e80941Smrg 977b8e80941Smrg bs = (uint16_t*)&buf[size++]; 978b8e80941Smrg *bs = util_bswap16(pic->picture_parameter.picture_width); 979b8e80941Smrg size++; 980b8e80941Smrg 981b8e80941Smrg buf[size++] = pic->picture_parameter.num_components; 982b8e80941Smrg 983b8e80941Smrg for (i = 0; i < pic->picture_parameter.num_components; ++i) { 984b8e80941Smrg buf[size++] = pic->picture_parameter.components[i].component_id; 985b8e80941Smrg buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 | 986b8e80941Smrg pic->picture_parameter.components[i].v_sampling_factor; 987b8e80941Smrg buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector; 988b8e80941Smrg } 989b8e80941Smrg 990b8e80941Smrg bs = (uint16_t*)&buf[len_pos]; 991b8e80941Smrg *bs = util_bswap16(size - saved_size - 2); 992b8e80941Smrg 993b8e80941Smrg saved_size = size; 994b8e80941Smrg 995b8e80941Smrg /* SOS */ 996b8e80941Smrg buf[size++] = 0xff; 997b8e80941Smrg buf[size++] = 0xda; 998b8e80941Smrg 999b8e80941Smrg len_pos = size++; 1000b8e80941Smrg size++; 1001b8e80941Smrg 1002b8e80941Smrg buf[size++] = pic->slice_parameter.num_components; 1003b8e80941Smrg 1004b8e80941Smrg for (i = 0; i < pic->slice_parameter.num_components; ++i) { 1005b8e80941Smrg buf[size++] = pic->slice_parameter.components[i].component_selector; 1006b8e80941Smrg buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 | 1007b8e80941Smrg pic->slice_parameter.components[i].ac_table_selector; 1008b8e80941Smrg } 1009b8e80941Smrg 1010b8e80941Smrg buf[size++] = 0x00; 1011b8e80941Smrg buf[size++] = 0x3f; 1012b8e80941Smrg buf[size++] = 0x00; 1013b8e80941Smrg 1014b8e80941Smrg bs = (uint16_t*)&buf[len_pos]; 1015b8e80941Smrg *bs = util_bswap16(size - saved_size - 2); 1016b8e80941Smrg 1017b8e80941Smrg dec->bs_ptr += size; 1018b8e80941Smrg dec->bs_size += size; 1019b8e80941Smrg} 1020b8e80941Smrg 1021b8e80941Smrg/** 1022b8e80941Smrg * destroy this video decoder 1023b8e80941Smrg */ 1024b8e80941Smrgstatic void ruvd_destroy(struct pipe_video_codec *decoder) 1025b8e80941Smrg{ 1026b8e80941Smrg struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 1027b8e80941Smrg unsigned i; 1028b8e80941Smrg 1029b8e80941Smrg assert(decoder); 1030b8e80941Smrg 1031b8e80941Smrg map_msg_fb_it_buf(dec); 1032b8e80941Smrg dec->msg->size = sizeof(*dec->msg); 1033b8e80941Smrg dec->msg->msg_type = RUVD_MSG_DESTROY; 1034b8e80941Smrg dec->msg->stream_handle = dec->stream_handle; 1035b8e80941Smrg send_msg_buf(dec); 1036b8e80941Smrg 1037b8e80941Smrg flush(dec, 0); 1038b8e80941Smrg 1039b8e80941Smrg dec->ws->cs_destroy(dec->cs); 1040b8e80941Smrg 1041b8e80941Smrg for (i = 0; i < NUM_BUFFERS; ++i) { 1042b8e80941Smrg rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); 1043b8e80941Smrg rvid_destroy_buffer(&dec->bs_buffers[i]); 1044b8e80941Smrg } 1045b8e80941Smrg 1046b8e80941Smrg rvid_destroy_buffer(&dec->dpb); 1047b8e80941Smrg rvid_destroy_buffer(&dec->ctx); 1048b8e80941Smrg rvid_destroy_buffer(&dec->sessionctx); 1049b8e80941Smrg 1050b8e80941Smrg FREE(dec); 1051b8e80941Smrg} 1052b8e80941Smrg 1053b8e80941Smrg/** 1054b8e80941Smrg * start decoding of a new frame 1055b8e80941Smrg */ 1056b8e80941Smrgstatic void ruvd_begin_frame(struct pipe_video_codec *decoder, 1057b8e80941Smrg struct pipe_video_buffer *target, 1058b8e80941Smrg struct pipe_picture_desc *picture) 1059b8e80941Smrg{ 1060b8e80941Smrg struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 1061b8e80941Smrg uintptr_t frame; 1062b8e80941Smrg 1063b8e80941Smrg assert(decoder); 1064b8e80941Smrg 1065b8e80941Smrg frame = ++dec->frame_number; 1066b8e80941Smrg vl_video_buffer_set_associated_data(target, decoder, (void *)frame, 1067b8e80941Smrg &ruvd_destroy_associated_data); 1068b8e80941Smrg 1069b8e80941Smrg dec->bs_size = 0; 1070b8e80941Smrg dec->bs_ptr = dec->ws->buffer_map( 1071b8e80941Smrg dec->bs_buffers[dec->cur_buffer].res->buf, 1072b8e80941Smrg dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); 1073b8e80941Smrg} 1074b8e80941Smrg 1075b8e80941Smrg/** 1076b8e80941Smrg * decode a macroblock 1077b8e80941Smrg */ 1078b8e80941Smrgstatic void ruvd_decode_macroblock(struct pipe_video_codec *decoder, 1079b8e80941Smrg struct pipe_video_buffer *target, 1080b8e80941Smrg struct pipe_picture_desc *picture, 1081b8e80941Smrg const struct pipe_macroblock *macroblocks, 1082b8e80941Smrg unsigned num_macroblocks) 1083b8e80941Smrg{ 1084b8e80941Smrg /* not supported (yet) */ 1085b8e80941Smrg assert(0); 1086b8e80941Smrg} 1087b8e80941Smrg 1088b8e80941Smrg/** 1089b8e80941Smrg * decode a bitstream 1090b8e80941Smrg */ 1091b8e80941Smrgstatic void ruvd_decode_bitstream(struct pipe_video_codec *decoder, 1092b8e80941Smrg struct pipe_video_buffer *target, 1093b8e80941Smrg struct pipe_picture_desc *picture, 1094b8e80941Smrg unsigned num_buffers, 1095b8e80941Smrg const void * const *buffers, 1096b8e80941Smrg const unsigned *sizes) 1097b8e80941Smrg{ 1098b8e80941Smrg struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 1099b8e80941Smrg enum pipe_video_format format = u_reduce_video_profile(picture->profile); 1100b8e80941Smrg unsigned i; 1101b8e80941Smrg 1102b8e80941Smrg assert(decoder); 1103b8e80941Smrg 1104b8e80941Smrg if (!dec->bs_ptr) 1105b8e80941Smrg return; 1106b8e80941Smrg 1107b8e80941Smrg if (format == PIPE_VIDEO_FORMAT_JPEG) 1108b8e80941Smrg get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture); 1109b8e80941Smrg 1110b8e80941Smrg for (i = 0; i < num_buffers; ++i) { 1111b8e80941Smrg struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; 1112b8e80941Smrg unsigned new_size = dec->bs_size + sizes[i]; 1113b8e80941Smrg 1114b8e80941Smrg if (format == PIPE_VIDEO_FORMAT_JPEG) 1115b8e80941Smrg new_size += 2; /* save for EOI */ 1116b8e80941Smrg 1117b8e80941Smrg if (new_size > buf->res->buf->size) { 1118b8e80941Smrg dec->ws->buffer_unmap(buf->res->buf); 1119b8e80941Smrg if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { 1120b8e80941Smrg RVID_ERR("Can't resize bitstream buffer!"); 1121b8e80941Smrg return; 1122b8e80941Smrg } 1123b8e80941Smrg 1124b8e80941Smrg dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, 1125b8e80941Smrg PIPE_TRANSFER_WRITE | 1126b8e80941Smrg RADEON_TRANSFER_TEMPORARY); 1127b8e80941Smrg if (!dec->bs_ptr) 1128b8e80941Smrg return; 1129b8e80941Smrg 1130b8e80941Smrg dec->bs_ptr += dec->bs_size; 1131b8e80941Smrg } 1132b8e80941Smrg 1133b8e80941Smrg memcpy(dec->bs_ptr, buffers[i], sizes[i]); 1134b8e80941Smrg dec->bs_size += sizes[i]; 1135b8e80941Smrg dec->bs_ptr += sizes[i]; 1136b8e80941Smrg } 1137b8e80941Smrg 1138b8e80941Smrg if (format == PIPE_VIDEO_FORMAT_JPEG) { 1139b8e80941Smrg ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */ 1140b8e80941Smrg ((uint8_t *)dec->bs_ptr)[1] = 0xd9; 1141b8e80941Smrg dec->bs_size += 2; 1142b8e80941Smrg dec->bs_ptr += 2; 1143b8e80941Smrg } 1144b8e80941Smrg} 1145b8e80941Smrg 1146b8e80941Smrg/** 1147b8e80941Smrg * end decoding of the current frame 1148b8e80941Smrg */ 1149b8e80941Smrgstatic void ruvd_end_frame(struct pipe_video_codec *decoder, 1150b8e80941Smrg struct pipe_video_buffer *target, 1151b8e80941Smrg struct pipe_picture_desc *picture) 1152b8e80941Smrg{ 1153b8e80941Smrg struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 1154b8e80941Smrg struct pb_buffer *dt; 1155b8e80941Smrg struct rvid_buffer *msg_fb_it_buf, *bs_buf; 1156b8e80941Smrg unsigned bs_size; 1157b8e80941Smrg 1158b8e80941Smrg assert(decoder); 1159b8e80941Smrg 1160b8e80941Smrg if (!dec->bs_ptr) 1161b8e80941Smrg return; 1162b8e80941Smrg 1163b8e80941Smrg msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; 1164b8e80941Smrg bs_buf = &dec->bs_buffers[dec->cur_buffer]; 1165b8e80941Smrg 1166b8e80941Smrg bs_size = align(dec->bs_size, 128); 1167b8e80941Smrg memset(dec->bs_ptr, 0, bs_size - dec->bs_size); 1168b8e80941Smrg dec->ws->buffer_unmap(bs_buf->res->buf); 1169b8e80941Smrg 1170b8e80941Smrg map_msg_fb_it_buf(dec); 1171b8e80941Smrg dec->msg->size = sizeof(*dec->msg); 1172b8e80941Smrg dec->msg->msg_type = RUVD_MSG_DECODE; 1173b8e80941Smrg dec->msg->stream_handle = dec->stream_handle; 1174b8e80941Smrg dec->msg->status_report_feedback_number = dec->frame_number; 1175b8e80941Smrg 1176b8e80941Smrg dec->msg->body.decode.stream_type = dec->stream_type; 1177b8e80941Smrg dec->msg->body.decode.decode_flags = 0x1; 1178b8e80941Smrg dec->msg->body.decode.width_in_samples = dec->base.width; 1179b8e80941Smrg dec->msg->body.decode.height_in_samples = dec->base.height; 1180b8e80941Smrg 1181b8e80941Smrg if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || 1182b8e80941Smrg (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { 1183b8e80941Smrg dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16; 1184b8e80941Smrg dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16; 1185b8e80941Smrg } 1186b8e80941Smrg 1187b8e80941Smrg if (dec->dpb.res) 1188b8e80941Smrg dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; 1189b8e80941Smrg dec->msg->body.decode.bsd_size = bs_size; 1190b8e80941Smrg dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); 1191b8e80941Smrg 1192b8e80941Smrg dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); 1193b8e80941Smrg 1194b8e80941Smrg switch (u_reduce_video_profile(picture->profile)) { 1195b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4_AVC: 1196b8e80941Smrg dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); 1197b8e80941Smrg break; 1198b8e80941Smrg 1199b8e80941Smrg case PIPE_VIDEO_FORMAT_HEVC: 1200b8e80941Smrg dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); 1201b8e80941Smrg if (dec->ctx.res == NULL) { 1202b8e80941Smrg unsigned ctx_size; 1203b8e80941Smrg if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) 1204b8e80941Smrg ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture); 1205b8e80941Smrg else 1206b8e80941Smrg ctx_size = calc_ctx_size_h265_main(dec); 1207b8e80941Smrg if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { 1208b8e80941Smrg RVID_ERR("Can't allocated context buffer.\n"); 1209b8e80941Smrg } 1210b8e80941Smrg rvid_clear_buffer(decoder->context, &dec->ctx); 1211b8e80941Smrg } 1212b8e80941Smrg 1213b8e80941Smrg if (dec->ctx.res) 1214b8e80941Smrg dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; 1215b8e80941Smrg break; 1216b8e80941Smrg 1217b8e80941Smrg case PIPE_VIDEO_FORMAT_VC1: 1218b8e80941Smrg dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); 1219b8e80941Smrg break; 1220b8e80941Smrg 1221b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG12: 1222b8e80941Smrg dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); 1223b8e80941Smrg break; 1224b8e80941Smrg 1225b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4: 1226b8e80941Smrg dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); 1227b8e80941Smrg break; 1228b8e80941Smrg 1229b8e80941Smrg case PIPE_VIDEO_FORMAT_JPEG: 1230b8e80941Smrg break; 1231b8e80941Smrg 1232b8e80941Smrg default: 1233b8e80941Smrg assert(0); 1234b8e80941Smrg return; 1235b8e80941Smrg } 1236b8e80941Smrg 1237b8e80941Smrg dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; 1238b8e80941Smrg dec->msg->body.decode.extension_support = 0x1; 1239b8e80941Smrg 1240b8e80941Smrg /* set at least the feedback buffer size */ 1241b8e80941Smrg dec->fb[0] = dec->fb_size; 1242b8e80941Smrg 1243b8e80941Smrg send_msg_buf(dec); 1244b8e80941Smrg 1245b8e80941Smrg if (dec->dpb.res) 1246b8e80941Smrg send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, 1247b8e80941Smrg RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); 1248b8e80941Smrg 1249b8e80941Smrg if (dec->ctx.res) 1250b8e80941Smrg send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, 1251b8e80941Smrg RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); 1252b8e80941Smrg send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 1253b8e80941Smrg 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); 1254b8e80941Smrg send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, 1255b8e80941Smrg RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); 1256b8e80941Smrg send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, 1257b8e80941Smrg FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); 1258b8e80941Smrg if (have_it(dec)) 1259b8e80941Smrg send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, 1260b8e80941Smrg FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); 1261b8e80941Smrg set_reg(dec, dec->reg.cntl, 1); 1262b8e80941Smrg 1263b8e80941Smrg flush(dec, PIPE_FLUSH_ASYNC); 1264b8e80941Smrg next_buffer(dec); 1265b8e80941Smrg} 1266b8e80941Smrg 1267b8e80941Smrg/** 1268b8e80941Smrg * flush any outstanding command buffers to the hardware 1269b8e80941Smrg */ 1270b8e80941Smrgstatic void ruvd_flush(struct pipe_video_codec *decoder) 1271b8e80941Smrg{ 1272b8e80941Smrg} 1273b8e80941Smrg 1274b8e80941Smrg/** 1275b8e80941Smrg * create and UVD decoder 1276b8e80941Smrg */ 1277b8e80941Smrgstruct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, 1278b8e80941Smrg const struct pipe_video_codec *templ, 1279b8e80941Smrg ruvd_set_dtb set_dtb) 1280b8e80941Smrg{ 1281b8e80941Smrg struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; 1282b8e80941Smrg struct r600_common_context *rctx = (struct r600_common_context*)context; 1283b8e80941Smrg unsigned dpb_size; 1284b8e80941Smrg unsigned width = templ->width, height = templ->height; 1285b8e80941Smrg unsigned bs_buf_size; 1286b8e80941Smrg struct radeon_info info; 1287b8e80941Smrg struct ruvd_decoder *dec; 1288b8e80941Smrg int r, i; 1289b8e80941Smrg 1290b8e80941Smrg ws->query_info(ws, &info); 1291b8e80941Smrg 1292b8e80941Smrg switch(u_reduce_video_profile(templ->profile)) { 1293b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG12: 1294b8e80941Smrg if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM) 1295b8e80941Smrg return vl_create_mpeg12_decoder(context, templ); 1296b8e80941Smrg 1297b8e80941Smrg /* fall through */ 1298b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4: 1299b8e80941Smrg width = align(width, VL_MACROBLOCK_WIDTH); 1300b8e80941Smrg height = align(height, VL_MACROBLOCK_HEIGHT); 1301b8e80941Smrg break; 1302b8e80941Smrg case PIPE_VIDEO_FORMAT_MPEG4_AVC: 1303b8e80941Smrg width = align(width, VL_MACROBLOCK_WIDTH); 1304b8e80941Smrg height = align(height, VL_MACROBLOCK_HEIGHT); 1305b8e80941Smrg break; 1306b8e80941Smrg 1307b8e80941Smrg default: 1308b8e80941Smrg break; 1309b8e80941Smrg } 1310b8e80941Smrg 1311b8e80941Smrg 1312b8e80941Smrg dec = CALLOC_STRUCT(ruvd_decoder); 1313b8e80941Smrg 1314b8e80941Smrg if (!dec) 1315b8e80941Smrg return NULL; 1316b8e80941Smrg 1317b8e80941Smrg if (info.drm_major < 3) 1318b8e80941Smrg dec->use_legacy = true; 1319b8e80941Smrg 1320b8e80941Smrg dec->base = *templ; 1321b8e80941Smrg dec->base.context = context; 1322b8e80941Smrg dec->base.width = width; 1323b8e80941Smrg dec->base.height = height; 1324b8e80941Smrg 1325b8e80941Smrg dec->base.destroy = ruvd_destroy; 1326b8e80941Smrg dec->base.begin_frame = ruvd_begin_frame; 1327b8e80941Smrg dec->base.decode_macroblock = ruvd_decode_macroblock; 1328b8e80941Smrg dec->base.decode_bitstream = ruvd_decode_bitstream; 1329b8e80941Smrg dec->base.end_frame = ruvd_end_frame; 1330b8e80941Smrg dec->base.flush = ruvd_flush; 1331b8e80941Smrg 1332b8e80941Smrg dec->stream_type = profile2stream_type(dec, info.family); 1333b8e80941Smrg dec->set_dtb = set_dtb; 1334b8e80941Smrg dec->stream_handle = rvid_alloc_stream_handle(); 1335b8e80941Smrg dec->screen = context->screen; 1336b8e80941Smrg dec->ws = ws; 1337b8e80941Smrg dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, false); 1338b8e80941Smrg if (!dec->cs) { 1339b8e80941Smrg RVID_ERR("Can't get command submission context.\n"); 1340b8e80941Smrg goto error; 1341b8e80941Smrg } 1342b8e80941Smrg 1343b8e80941Smrg dec->fb_size = FB_BUFFER_SIZE; 1344b8e80941Smrg bs_buf_size = width * height * (512 / (16 * 16)); 1345b8e80941Smrg for (i = 0; i < NUM_BUFFERS; ++i) { 1346b8e80941Smrg unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; 1347b8e80941Smrg STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); 1348b8e80941Smrg if (have_it(dec)) 1349b8e80941Smrg msg_fb_it_size += IT_SCALING_TABLE_SIZE; 1350b8e80941Smrg if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], 1351b8e80941Smrg msg_fb_it_size, PIPE_USAGE_STAGING)) { 1352b8e80941Smrg RVID_ERR("Can't allocated message buffers.\n"); 1353b8e80941Smrg goto error; 1354b8e80941Smrg } 1355b8e80941Smrg 1356b8e80941Smrg if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i], 1357b8e80941Smrg bs_buf_size, PIPE_USAGE_STAGING)) { 1358b8e80941Smrg RVID_ERR("Can't allocated bitstream buffers.\n"); 1359b8e80941Smrg goto error; 1360b8e80941Smrg } 1361b8e80941Smrg 1362b8e80941Smrg rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); 1363b8e80941Smrg rvid_clear_buffer(context, &dec->bs_buffers[i]); 1364b8e80941Smrg } 1365b8e80941Smrg 1366b8e80941Smrg dpb_size = calc_dpb_size(dec); 1367b8e80941Smrg if (dpb_size) { 1368b8e80941Smrg if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { 1369b8e80941Smrg RVID_ERR("Can't allocated dpb.\n"); 1370b8e80941Smrg goto error; 1371b8e80941Smrg } 1372b8e80941Smrg rvid_clear_buffer(context, &dec->dpb); 1373b8e80941Smrg } 1374b8e80941Smrg 1375b8e80941Smrg dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; 1376b8e80941Smrg dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; 1377b8e80941Smrg dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; 1378b8e80941Smrg dec->reg.cntl = RUVD_ENGINE_CNTL; 1379b8e80941Smrg 1380b8e80941Smrg map_msg_fb_it_buf(dec); 1381b8e80941Smrg dec->msg->size = sizeof(*dec->msg); 1382b8e80941Smrg dec->msg->msg_type = RUVD_MSG_CREATE; 1383b8e80941Smrg dec->msg->stream_handle = dec->stream_handle; 1384b8e80941Smrg dec->msg->body.create.stream_type = dec->stream_type; 1385b8e80941Smrg dec->msg->body.create.width_in_samples = dec->base.width; 1386b8e80941Smrg dec->msg->body.create.height_in_samples = dec->base.height; 1387b8e80941Smrg dec->msg->body.create.dpb_size = dpb_size; 1388b8e80941Smrg send_msg_buf(dec); 1389b8e80941Smrg r = flush(dec, 0); 1390b8e80941Smrg if (r) 1391b8e80941Smrg goto error; 1392b8e80941Smrg 1393b8e80941Smrg next_buffer(dec); 1394b8e80941Smrg 1395b8e80941Smrg return &dec->base; 1396b8e80941Smrg 1397b8e80941Smrgerror: 1398b8e80941Smrg if (dec->cs) dec->ws->cs_destroy(dec->cs); 1399b8e80941Smrg 1400b8e80941Smrg for (i = 0; i < NUM_BUFFERS; ++i) { 1401b8e80941Smrg rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); 1402b8e80941Smrg rvid_destroy_buffer(&dec->bs_buffers[i]); 1403b8e80941Smrg } 1404b8e80941Smrg 1405b8e80941Smrg rvid_destroy_buffer(&dec->dpb); 1406b8e80941Smrg rvid_destroy_buffer(&dec->ctx); 1407b8e80941Smrg rvid_destroy_buffer(&dec->sessionctx); 1408b8e80941Smrg 1409b8e80941Smrg FREE(dec); 1410b8e80941Smrg 1411b8e80941Smrg return NULL; 1412b8e80941Smrg} 1413b8e80941Smrg 1414b8e80941Smrg/* calculate top/bottom offset */ 1415b8e80941Smrgstatic unsigned texture_offset(struct radeon_surf *surface, unsigned layer) 1416b8e80941Smrg{ 1417b8e80941Smrg return surface->u.legacy.level[0].offset + 1418b8e80941Smrg layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; 1419b8e80941Smrg} 1420b8e80941Smrg 1421b8e80941Smrg/* hw encode the aspect of macro tiles */ 1422b8e80941Smrgstatic unsigned macro_tile_aspect(unsigned macro_tile_aspect) 1423b8e80941Smrg{ 1424b8e80941Smrg switch (macro_tile_aspect) { 1425b8e80941Smrg default: 1426b8e80941Smrg case 1: macro_tile_aspect = 0; break; 1427b8e80941Smrg case 2: macro_tile_aspect = 1; break; 1428b8e80941Smrg case 4: macro_tile_aspect = 2; break; 1429b8e80941Smrg case 8: macro_tile_aspect = 3; break; 1430b8e80941Smrg } 1431b8e80941Smrg return macro_tile_aspect; 1432b8e80941Smrg} 1433b8e80941Smrg 1434b8e80941Smrg/* hw encode the bank width and height */ 1435b8e80941Smrgstatic unsigned bank_wh(unsigned bankwh) 1436b8e80941Smrg{ 1437b8e80941Smrg switch (bankwh) { 1438b8e80941Smrg default: 1439b8e80941Smrg case 1: bankwh = 0; break; 1440b8e80941Smrg case 2: bankwh = 1; break; 1441b8e80941Smrg case 4: bankwh = 2; break; 1442b8e80941Smrg case 8: bankwh = 3; break; 1443b8e80941Smrg } 1444b8e80941Smrg return bankwh; 1445b8e80941Smrg} 1446b8e80941Smrg 1447b8e80941Smrg/** 1448b8e80941Smrg * fill decoding target field from the luma and chroma surfaces 1449b8e80941Smrg */ 1450b8e80941Smrgvoid ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, 1451b8e80941Smrg struct radeon_surf *chroma) 1452b8e80941Smrg{ 1453b8e80941Smrg msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; 1454b8e80941Smrg switch (luma->u.legacy.level[0].mode) { 1455b8e80941Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1456b8e80941Smrg msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; 1457b8e80941Smrg msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; 1458b8e80941Smrg break; 1459b8e80941Smrg case RADEON_SURF_MODE_1D: 1460b8e80941Smrg msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; 1461b8e80941Smrg msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; 1462b8e80941Smrg break; 1463b8e80941Smrg case RADEON_SURF_MODE_2D: 1464b8e80941Smrg msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; 1465b8e80941Smrg msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; 1466b8e80941Smrg break; 1467b8e80941Smrg default: 1468b8e80941Smrg assert(0); 1469b8e80941Smrg break; 1470b8e80941Smrg } 1471b8e80941Smrg 1472b8e80941Smrg msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0); 1473b8e80941Smrg if (chroma) 1474b8e80941Smrg msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0); 1475b8e80941Smrg if (msg->body.decode.dt_field_mode) { 1476b8e80941Smrg msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1); 1477b8e80941Smrg if (chroma) 1478b8e80941Smrg msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1); 1479b8e80941Smrg } else { 1480b8e80941Smrg msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; 1481b8e80941Smrg msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; 1482b8e80941Smrg } 1483b8e80941Smrg 1484b8e80941Smrg if (chroma) { 1485b8e80941Smrg assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); 1486b8e80941Smrg assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); 1487b8e80941Smrg assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); 1488b8e80941Smrg } 1489b8e80941Smrg 1490b8e80941Smrg msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); 1491b8e80941Smrg msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); 1492b8e80941Smrg msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); 1493b8e80941Smrg} 1494