radeon_uvd.c revision af69d88d
1/************************************************************************** 2 * 3 * Copyright 2011 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Authors: 30 * Christian König <christian.koenig@amd.com> 31 * 32 */ 33 34#include <sys/types.h> 35#include <assert.h> 36#include <errno.h> 37#include <unistd.h> 38#include <stdio.h> 39 40#include "pipe/p_video_codec.h" 41 42#include "util/u_memory.h" 43#include "util/u_video.h" 44 45#include "vl/vl_defines.h" 46#include "vl/vl_mpeg12_decoder.h" 47 48#include "../../winsys/radeon/drm/radeon_winsys.h" 49#include "r600_pipe_common.h" 50#include "radeon_video.h" 51#include "radeon_uvd.h" 52 53#define NUM_BUFFERS 4 54 55#define NUM_MPEG2_REFS 6 56#define NUM_H264_REFS 17 57#define NUM_VC1_REFS 5 58 59#define FB_BUFFER_OFFSET 0x1000 60#define FB_BUFFER_SIZE 2048 61 62/* UVD decoder representation */ 63struct ruvd_decoder { 64 struct pipe_video_codec base; 65 66 ruvd_set_dtb set_dtb; 67 68 unsigned stream_handle; 69 unsigned frame_number; 70 71 struct radeon_winsys* ws; 72 struct radeon_winsys_cs* cs; 73 74 unsigned cur_buffer; 75 76 struct rvid_buffer msg_fb_buffers[NUM_BUFFERS]; 77 struct ruvd_msg *msg; 78 uint32_t *fb; 79 80 struct rvid_buffer bs_buffers[NUM_BUFFERS]; 81 void* bs_ptr; 82 unsigned bs_size; 83 84 struct rvid_buffer dpb; 85}; 86 87/* flush IB to the hardware */ 88static void flush(struct ruvd_decoder *dec) 89{ 90 dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0); 91} 92 93/* add a new set register command to the IB */ 94static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) 95{ 96 uint32_t *pm4 = dec->cs->buf; 97 pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0); 98 pm4[dec->cs->cdw++] = val; 99} 100 101/* send a command to the VCPU through the GPCOM registers */ 102static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, 103 struct radeon_winsys_cs_handle* cs_buf, uint32_t off, 104 enum radeon_bo_usage usage, enum radeon_bo_domain domain) 105{ 106 int reloc_idx; 107 108 reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain, 109 RADEON_PRIO_MIN); 110 set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); 111 set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); 112 set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1); 113} 114 115/* map the next available message/feedback buffer */ 116static void map_msg_fb_buf(struct ruvd_decoder *dec) 117{ 118 struct rvid_buffer* buf; 119 uint8_t *ptr; 120 121 /* grab the current message/feedback buffer */ 122 buf = &dec->msg_fb_buffers[dec->cur_buffer]; 123 124 /* and map it for CPU access */ 125 ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE); 126 127 /* calc buffer offsets */ 128 dec->msg = (struct ruvd_msg *)ptr; 129 dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); 130} 131 132/* unmap and send a message command to the VCPU */ 133static void send_msg_buf(struct ruvd_decoder *dec) 134{ 135 struct rvid_buffer* buf; 136 137 /* ignore the request if message/feedback buffer isn't mapped */ 138 if (!dec->msg || !dec->fb) 139 return; 140 141 /* grab the current message buffer */ 142 buf = &dec->msg_fb_buffers[dec->cur_buffer]; 143 144 /* unmap the buffer */ 145 dec->ws->buffer_unmap(buf->cs_handle); 146 dec->msg = NULL; 147 dec->fb = NULL; 148 149 /* and send it to the hardware */ 150 send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->cs_handle, 0, 151 RADEON_USAGE_READ, RADEON_DOMAIN_GTT); 152} 153 154/* cycle to the next set of buffers */ 155static void next_buffer(struct ruvd_decoder *dec) 156{ 157 ++dec->cur_buffer; 158 dec->cur_buffer %= NUM_BUFFERS; 159} 160 161/* convert the profile into something UVD understands */ 162static uint32_t profile2stream_type(enum pipe_video_profile profile) 163{ 164 switch (u_reduce_video_profile(profile)) { 165 case PIPE_VIDEO_FORMAT_MPEG4_AVC: 166 return RUVD_CODEC_H264; 167 168 case PIPE_VIDEO_FORMAT_VC1: 169 return RUVD_CODEC_VC1; 170 171 case PIPE_VIDEO_FORMAT_MPEG12: 172 return RUVD_CODEC_MPEG2; 173 174 case PIPE_VIDEO_FORMAT_MPEG4: 175 return RUVD_CODEC_MPEG4; 176 177 default: 178 assert(0); 179 return 0; 180 } 181} 182 183/* calculate size of reference picture buffer */ 184static unsigned calc_dpb_size(const struct pipe_video_codec *templ) 185{ 186 unsigned width_in_mb, height_in_mb, image_size, dpb_size; 187 188 // always align them to MB size for dpb calculation 189 unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH); 190 unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT); 191 192 // always one more for currently decoded picture 193 unsigned max_references = templ->max_references + 1; 194 195 // aligned size of a single frame 196 image_size = width * height; 197 image_size += image_size / 2; 198 image_size = align(image_size, 1024); 199 200 // picture width & height in 16 pixel units 201 width_in_mb = width / VL_MACROBLOCK_WIDTH; 202 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); 203 204 switch (u_reduce_video_profile(templ->profile)) { 205 case PIPE_VIDEO_FORMAT_MPEG4_AVC: 206 // the firmware seems to allways assume a minimum of ref frames 207 max_references = MAX2(NUM_H264_REFS, max_references); 208 209 // reference picture buffer 210 dpb_size = image_size * max_references; 211 212 // macroblock context buffer 213 dpb_size += width_in_mb * height_in_mb * max_references * 192; 214 215 // IT surface buffer 216 dpb_size += width_in_mb * height_in_mb * 32; 217 break; 218 219 case PIPE_VIDEO_FORMAT_VC1: 220 // the firmware seems to allways assume a minimum of ref frames 221 max_references = MAX2(NUM_VC1_REFS, max_references); 222 223 // reference picture buffer 224 dpb_size = image_size * max_references; 225 226 // CONTEXT_BUFFER 227 dpb_size += width_in_mb * height_in_mb * 128; 228 229 // IT surface buffer 230 dpb_size += width_in_mb * 64; 231 232 // DB surface buffer 233 dpb_size += width_in_mb * 128; 234 235 // BP 236 dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); 237 break; 238 239 case PIPE_VIDEO_FORMAT_MPEG12: 240 // reference picture buffer, must be big enough for all frames 241 dpb_size = image_size * NUM_MPEG2_REFS; 242 break; 243 244 case PIPE_VIDEO_FORMAT_MPEG4: 245 // reference picture buffer 246 dpb_size = image_size * max_references; 247 248 // CM 249 dpb_size += width_in_mb * height_in_mb * 64; 250 251 // IT surface buffer 252 dpb_size += align(width_in_mb * height_in_mb * 32, 64); 253 break; 254 255 default: 256 // something is missing here 257 assert(0); 258 259 // at least use a sane default value 260 dpb_size = 32 * 1024 * 1024; 261 break; 262 } 263 return dpb_size; 264} 265 266/* get h264 specific message bits */ 267static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) 268{ 269 struct ruvd_h264 result; 270 271 memset(&result, 0, sizeof(result)); 272 switch (pic->base.profile) { 273 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: 274 result.profile = RUVD_H264_PROFILE_BASELINE; 275 break; 276 277 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: 278 result.profile = RUVD_H264_PROFILE_MAIN; 279 break; 280 281 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: 282 result.profile = RUVD_H264_PROFILE_HIGH; 283 break; 284 285 default: 286 assert(0); 287 break; 288 } 289 if (((dec->base.width * dec->base.height) >> 8) <= 1620) 290 result.level = 30; 291 else 292 result.level = 41; 293 294 result.sps_info_flags = 0; 295 result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; 296 result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; 297 result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; 298 result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; 299 300 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; 301 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; 302 result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; 303 result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; 304 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; 305 306 switch (dec->base.chroma_format) { 307 case PIPE_VIDEO_CHROMA_FORMAT_400: 308 result.chroma_format = 0; 309 break; 310 case PIPE_VIDEO_CHROMA_FORMAT_420: 311 result.chroma_format = 1; 312 break; 313 case PIPE_VIDEO_CHROMA_FORMAT_422: 314 result.chroma_format = 2; 315 break; 316 case PIPE_VIDEO_CHROMA_FORMAT_444: 317 result.chroma_format = 3; 318 break; 319 } 320 321 result.pps_info_flags = 0; 322 result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; 323 result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; 324 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; 325 result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; 326 result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; 327 result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; 328 result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; 329 result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; 330 331 result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; 332 result.slice_group_map_type = pic->pps->slice_group_map_type; 333 result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; 334 result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; 335 result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; 336 result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; 337 338 memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); 339 memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); 340 341 result.num_ref_frames = pic->num_ref_frames; 342 343 result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; 344 result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; 345 346 result.frame_num = pic->frame_num; 347 memcpy(result.frame_num_list, pic->frame_num_list, 4*16); 348 result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; 349 result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; 350 memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); 351 352 result.decoded_pic_idx = pic->frame_num; 353 354 return result; 355} 356 357/* get vc1 specific message bits */ 358static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) 359{ 360 struct ruvd_vc1 result; 361 362 memset(&result, 0, sizeof(result)); 363 364 switch(pic->base.profile) { 365 case PIPE_VIDEO_PROFILE_VC1_SIMPLE: 366 result.profile = RUVD_VC1_PROFILE_SIMPLE; 367 result.level = 1; 368 break; 369 370 case PIPE_VIDEO_PROFILE_VC1_MAIN: 371 result.profile = RUVD_VC1_PROFILE_MAIN; 372 result.level = 2; 373 break; 374 375 case PIPE_VIDEO_PROFILE_VC1_ADVANCED: 376 result.profile = RUVD_VC1_PROFILE_ADVANCED; 377 result.level = 4; 378 break; 379 380 default: 381 assert(0); 382 } 383 384 /* fields common for all profiles */ 385 result.sps_info_flags |= pic->postprocflag << 7; 386 result.sps_info_flags |= pic->pulldown << 6; 387 result.sps_info_flags |= pic->interlace << 5; 388 result.sps_info_flags |= pic->tfcntrflag << 4; 389 result.sps_info_flags |= pic->finterpflag << 3; 390 result.sps_info_flags |= pic->psf << 1; 391 392 result.pps_info_flags |= pic->range_mapy_flag << 31; 393 result.pps_info_flags |= pic->range_mapy << 28; 394 result.pps_info_flags |= pic->range_mapuv_flag << 27; 395 result.pps_info_flags |= pic->range_mapuv << 24; 396 result.pps_info_flags |= pic->multires << 21; 397 result.pps_info_flags |= pic->maxbframes << 16; 398 result.pps_info_flags |= pic->overlap << 11; 399 result.pps_info_flags |= pic->quantizer << 9; 400 result.pps_info_flags |= pic->panscan_flag << 7; 401 result.pps_info_flags |= pic->refdist_flag << 6; 402 result.pps_info_flags |= pic->vstransform << 0; 403 404 /* some fields only apply to main/advanced profile */ 405 if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { 406 result.pps_info_flags |= pic->syncmarker << 20; 407 result.pps_info_flags |= pic->rangered << 19; 408 result.pps_info_flags |= pic->loopfilter << 5; 409 result.pps_info_flags |= pic->fastuvmc << 4; 410 result.pps_info_flags |= pic->extended_mv << 3; 411 result.pps_info_flags |= pic->extended_dmv << 8; 412 result.pps_info_flags |= pic->dquant << 1; 413 } 414 415 result.chroma_format = 1; 416 417#if 0 418//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) 419uint32_t slice_count 420uint8_t picture_type 421uint8_t frame_coding_mode 422uint8_t deblockEnable 423uint8_t pquant 424#endif 425 426 return result; 427} 428 429/* extract the frame number from a referenced video buffer */ 430static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) 431{ 432 uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; 433 uint32_t max = MAX2(dec->frame_number, 1) - 1; 434 uintptr_t frame; 435 436 /* seems to be the most sane fallback */ 437 if (!ref) 438 return max; 439 440 /* get the frame number from the associated data */ 441 frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); 442 443 /* limit the frame number to a valid range */ 444 return MAX2(MIN2(frame, max), min); 445} 446 447/* get mpeg2 specific msg bits */ 448static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, 449 struct pipe_mpeg12_picture_desc *pic) 450{ 451 const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; 452 struct ruvd_mpeg2 result; 453 unsigned i; 454 455 memset(&result, 0, sizeof(result)); 456 result.decoded_pic_idx = dec->frame_number; 457 for (i = 0; i < 2; ++i) 458 result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); 459 460 result.load_intra_quantiser_matrix = 1; 461 result.load_nonintra_quantiser_matrix = 1; 462 463 for (i = 0; i < 64; ++i) { 464 result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; 465 result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; 466 } 467 468 result.profile_and_level_indication = 0; 469 result.chroma_format = 0x1; 470 471 result.picture_coding_type = pic->picture_coding_type; 472 result.f_code[0][0] = pic->f_code[0][0] + 1; 473 result.f_code[0][1] = pic->f_code[0][1] + 1; 474 result.f_code[1][0] = pic->f_code[1][0] + 1; 475 result.f_code[1][1] = pic->f_code[1][1] + 1; 476 result.intra_dc_precision = pic->intra_dc_precision; 477 result.pic_structure = pic->picture_structure; 478 result.top_field_first = pic->top_field_first; 479 result.frame_pred_frame_dct = pic->frame_pred_frame_dct; 480 result.concealment_motion_vectors = pic->concealment_motion_vectors; 481 result.q_scale_type = pic->q_scale_type; 482 result.intra_vlc_format = pic->intra_vlc_format; 483 result.alternate_scan = pic->alternate_scan; 484 485 return result; 486} 487 488/* get mpeg4 specific msg bits */ 489static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, 490 struct pipe_mpeg4_picture_desc *pic) 491{ 492 struct ruvd_mpeg4 result; 493 unsigned i; 494 495 memset(&result, 0, sizeof(result)); 496 result.decoded_pic_idx = dec->frame_number; 497 for (i = 0; i < 2; ++i) 498 result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); 499 500 result.variant_type = 0; 501 result.profile_and_level_indication = 0xF0; // ASP Level0 502 503 result.video_object_layer_verid = 0x5; // advanced simple 504 result.video_object_layer_shape = 0x0; // rectangular 505 506 result.video_object_layer_width = dec->base.width; 507 result.video_object_layer_height = dec->base.height; 508 509 result.vop_time_increment_resolution = pic->vop_time_increment_resolution; 510 511 result.flags |= pic->short_video_header << 0; 512 //result.flags |= obmc_disable << 1; 513 result.flags |= pic->interlaced << 2; 514 result.flags |= 1 << 3; // load_intra_quant_mat 515 result.flags |= 1 << 4; // load_nonintra_quant_mat 516 result.flags |= pic->quarter_sample << 5; 517 result.flags |= 1 << 6; // complexity_estimation_disable 518 result.flags |= pic->resync_marker_disable << 7; 519 //result.flags |= data_partitioned << 8; 520 //result.flags |= reversible_vlc << 9; 521 result.flags |= 0 << 10; // newpred_enable 522 result.flags |= 0 << 11; // reduced_resolution_vop_enable 523 //result.flags |= scalability << 12; 524 //result.flags |= is_object_layer_identifier << 13; 525 //result.flags |= fixed_vop_rate << 14; 526 //result.flags |= newpred_segment_type << 15; 527 528 result.quant_type = pic->quant_type; 529 530 for (i = 0; i < 64; ++i) { 531 result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; 532 result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; 533 } 534 535 /* 536 int32_t trd [2] 537 int32_t trb [2] 538 uint8_t vop_coding_type 539 uint8_t vop_fcode_forward 540 uint8_t vop_fcode_backward 541 uint8_t rounding_control 542 uint8_t alternate_vertical_scan_flag 543 uint8_t top_field_first 544 */ 545 546 return result; 547} 548 549/** 550 * destroy this video decoder 551 */ 552static void ruvd_destroy(struct pipe_video_codec *decoder) 553{ 554 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 555 unsigned i; 556 557 assert(decoder); 558 559 map_msg_fb_buf(dec); 560 memset(dec->msg, 0, sizeof(*dec->msg)); 561 dec->msg->size = sizeof(*dec->msg); 562 dec->msg->msg_type = RUVD_MSG_DESTROY; 563 dec->msg->stream_handle = dec->stream_handle; 564 send_msg_buf(dec); 565 566 flush(dec); 567 568 dec->ws->cs_destroy(dec->cs); 569 570 for (i = 0; i < NUM_BUFFERS; ++i) { 571 rvid_destroy_buffer(&dec->msg_fb_buffers[i]); 572 rvid_destroy_buffer(&dec->bs_buffers[i]); 573 } 574 575 rvid_destroy_buffer(&dec->dpb); 576 577 FREE(dec); 578} 579 580/* free associated data in the video buffer callback */ 581static void ruvd_destroy_associated_data(void *data) 582{ 583 /* NOOP, since we only use an intptr */ 584} 585 586/** 587 * start decoding of a new frame 588 */ 589static void ruvd_begin_frame(struct pipe_video_codec *decoder, 590 struct pipe_video_buffer *target, 591 struct pipe_picture_desc *picture) 592{ 593 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 594 uintptr_t frame; 595 596 assert(decoder); 597 598 frame = ++dec->frame_number; 599 vl_video_buffer_set_associated_data(target, decoder, (void *)frame, 600 &ruvd_destroy_associated_data); 601 602 dec->bs_size = 0; 603 dec->bs_ptr = dec->ws->buffer_map( 604 dec->bs_buffers[dec->cur_buffer].cs_handle, 605 dec->cs, PIPE_TRANSFER_WRITE); 606} 607 608/** 609 * decode a macroblock 610 */ 611static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, 612 struct pipe_video_buffer *target, 613 struct pipe_picture_desc *picture, 614 const struct pipe_macroblock *macroblocks, 615 unsigned num_macroblocks) 616{ 617 /* not supported (yet) */ 618 assert(0); 619} 620 621/** 622 * decode a bitstream 623 */ 624static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, 625 struct pipe_video_buffer *target, 626 struct pipe_picture_desc *picture, 627 unsigned num_buffers, 628 const void * const *buffers, 629 const unsigned *sizes) 630{ 631 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 632 unsigned i; 633 634 assert(decoder); 635 636 if (!dec->bs_ptr) 637 return; 638 639 for (i = 0; i < num_buffers; ++i) { 640 struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; 641 unsigned new_size = dec->bs_size + sizes[i]; 642 643 if (new_size > buf->buf->size) { 644 dec->ws->buffer_unmap(buf->cs_handle); 645 if (!rvid_resize_buffer(dec->ws, dec->cs, buf, new_size)) { 646 RVID_ERR("Can't resize bitstream buffer!"); 647 return; 648 } 649 650 dec->bs_ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, 651 PIPE_TRANSFER_WRITE); 652 if (!dec->bs_ptr) 653 return; 654 655 dec->bs_ptr += dec->bs_size; 656 } 657 658 memcpy(dec->bs_ptr, buffers[i], sizes[i]); 659 dec->bs_size += sizes[i]; 660 dec->bs_ptr += sizes[i]; 661 } 662} 663 664/** 665 * end decoding of the current frame 666 */ 667static void ruvd_end_frame(struct pipe_video_codec *decoder, 668 struct pipe_video_buffer *target, 669 struct pipe_picture_desc *picture) 670{ 671 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; 672 struct radeon_winsys_cs_handle *dt; 673 struct rvid_buffer *msg_fb_buf, *bs_buf; 674 unsigned bs_size; 675 676 assert(decoder); 677 678 if (!dec->bs_ptr) 679 return; 680 681 msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer]; 682 bs_buf = &dec->bs_buffers[dec->cur_buffer]; 683 684 bs_size = align(dec->bs_size, 128); 685 memset(dec->bs_ptr, 0, bs_size - dec->bs_size); 686 dec->ws->buffer_unmap(bs_buf->cs_handle); 687 688 map_msg_fb_buf(dec); 689 dec->msg->size = sizeof(*dec->msg); 690 dec->msg->msg_type = RUVD_MSG_DECODE; 691 dec->msg->stream_handle = dec->stream_handle; 692 dec->msg->status_report_feedback_number = dec->frame_number; 693 694 dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile); 695 dec->msg->body.decode.decode_flags = 0x1; 696 dec->msg->body.decode.width_in_samples = dec->base.width; 697 dec->msg->body.decode.height_in_samples = dec->base.height; 698 699 dec->msg->body.decode.dpb_size = dec->dpb.buf->size; 700 dec->msg->body.decode.bsd_size = bs_size; 701 702 dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); 703 704 switch (u_reduce_video_profile(picture->profile)) { 705 case PIPE_VIDEO_FORMAT_MPEG4_AVC: 706 dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); 707 break; 708 709 case PIPE_VIDEO_FORMAT_VC1: 710 dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); 711 break; 712 713 case PIPE_VIDEO_FORMAT_MPEG12: 714 dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); 715 break; 716 717 case PIPE_VIDEO_FORMAT_MPEG4: 718 dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); 719 break; 720 721 default: 722 assert(0); 723 return; 724 } 725 726 dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; 727 dec->msg->body.decode.extension_support = 0x1; 728 729 /* set at least the feedback buffer size */ 730 dec->fb[0] = FB_BUFFER_SIZE; 731 732 send_msg_buf(dec); 733 734 send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.cs_handle, 0, 735 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); 736 send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->cs_handle, 737 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); 738 send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, 739 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); 740 send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->cs_handle, 741 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); 742 set_reg(dec, RUVD_ENGINE_CNTL, 1); 743 744 flush(dec); 745 next_buffer(dec); 746} 747 748/** 749 * flush any outstanding command buffers to the hardware 750 */ 751static void ruvd_flush(struct pipe_video_codec *decoder) 752{ 753} 754 755/** 756 * create and UVD decoder 757 */ 758struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, 759 const struct pipe_video_codec *templ, 760 ruvd_set_dtb set_dtb) 761{ 762 struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; 763 unsigned dpb_size = calc_dpb_size(templ); 764 unsigned width = templ->width, height = templ->height; 765 unsigned bs_buf_size; 766 struct radeon_info info; 767 struct ruvd_decoder *dec; 768 int i; 769 770 ws->query_info(ws, &info); 771 772 switch(u_reduce_video_profile(templ->profile)) { 773 case PIPE_VIDEO_FORMAT_MPEG12: 774 if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM) 775 return vl_create_mpeg12_decoder(context, templ); 776 777 /* fall through */ 778 case PIPE_VIDEO_FORMAT_MPEG4: 779 case PIPE_VIDEO_FORMAT_MPEG4_AVC: 780 width = align(width, VL_MACROBLOCK_WIDTH); 781 height = align(height, VL_MACROBLOCK_HEIGHT); 782 break; 783 784 default: 785 break; 786 } 787 788 789 dec = CALLOC_STRUCT(ruvd_decoder); 790 791 if (!dec) 792 return NULL; 793 794 dec->base = *templ; 795 dec->base.context = context; 796 dec->base.width = width; 797 dec->base.height = height; 798 799 dec->base.destroy = ruvd_destroy; 800 dec->base.begin_frame = ruvd_begin_frame; 801 dec->base.decode_macroblock = ruvd_decode_macroblock; 802 dec->base.decode_bitstream = ruvd_decode_bitstream; 803 dec->base.end_frame = ruvd_end_frame; 804 dec->base.flush = ruvd_flush; 805 806 dec->set_dtb = set_dtb; 807 dec->stream_handle = rvid_alloc_stream_handle(); 808 dec->ws = ws; 809 dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL); 810 if (!dec->cs) { 811 RVID_ERR("Can't get command submission context.\n"); 812 goto error; 813 } 814 815 bs_buf_size = width * height * 512 / (16 * 16); 816 for (i = 0; i < NUM_BUFFERS; ++i) { 817 unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; 818 STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); 819 if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size, 820 RADEON_DOMAIN_VRAM, 0)) { 821 RVID_ERR("Can't allocated message buffers.\n"); 822 goto error; 823 } 824 825 if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size, 826 RADEON_DOMAIN_GTT, 0)) { 827 RVID_ERR("Can't allocated bitstream buffers.\n"); 828 goto error; 829 } 830 831 rvid_clear_buffer(dec->ws, dec->cs, &dec->msg_fb_buffers[i]); 832 rvid_clear_buffer(dec->ws, dec->cs, &dec->bs_buffers[i]); 833 } 834 835 if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM, 0)) { 836 RVID_ERR("Can't allocated dpb.\n"); 837 goto error; 838 } 839 840 rvid_clear_buffer(dec->ws, dec->cs, &dec->dpb); 841 842 map_msg_fb_buf(dec); 843 dec->msg->size = sizeof(*dec->msg); 844 dec->msg->msg_type = RUVD_MSG_CREATE; 845 dec->msg->stream_handle = dec->stream_handle; 846 dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile); 847 dec->msg->body.create.width_in_samples = dec->base.width; 848 dec->msg->body.create.height_in_samples = dec->base.height; 849 dec->msg->body.create.dpb_size = dec->dpb.buf->size; 850 send_msg_buf(dec); 851 flush(dec); 852 next_buffer(dec); 853 854 return &dec->base; 855 856error: 857 if (dec->cs) dec->ws->cs_destroy(dec->cs); 858 859 for (i = 0; i < NUM_BUFFERS; ++i) { 860 rvid_destroy_buffer(&dec->msg_fb_buffers[i]); 861 rvid_destroy_buffer(&dec->bs_buffers[i]); 862 } 863 864 rvid_destroy_buffer(&dec->dpb); 865 866 FREE(dec); 867 868 return NULL; 869} 870 871/* calculate top/bottom offset */ 872static unsigned texture_offset(struct radeon_surface *surface, unsigned layer) 873{ 874 return surface->level[0].offset + 875 layer * surface->level[0].slice_size; 876} 877 878/* hw encode the aspect of macro tiles */ 879static unsigned macro_tile_aspect(unsigned macro_tile_aspect) 880{ 881 switch (macro_tile_aspect) { 882 default: 883 case 1: macro_tile_aspect = 0; break; 884 case 2: macro_tile_aspect = 1; break; 885 case 4: macro_tile_aspect = 2; break; 886 case 8: macro_tile_aspect = 3; break; 887 } 888 return macro_tile_aspect; 889} 890 891/* hw encode the bank width and height */ 892static unsigned bank_wh(unsigned bankwh) 893{ 894 switch (bankwh) { 895 default: 896 case 1: bankwh = 0; break; 897 case 2: bankwh = 1; break; 898 case 4: bankwh = 2; break; 899 case 8: bankwh = 3; break; 900 } 901 return bankwh; 902} 903 904/** 905 * fill decoding target field from the luma and chroma surfaces 906 */ 907void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma, 908 struct radeon_surface *chroma) 909{ 910 msg->body.decode.dt_pitch = luma->level[0].pitch_bytes; 911 switch (luma->level[0].mode) { 912 case RADEON_SURF_MODE_LINEAR_ALIGNED: 913 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; 914 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; 915 break; 916 case RADEON_SURF_MODE_1D: 917 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; 918 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; 919 break; 920 case RADEON_SURF_MODE_2D: 921 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; 922 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; 923 break; 924 default: 925 assert(0); 926 break; 927 } 928 929 msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0); 930 msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0); 931 if (msg->body.decode.dt_field_mode) { 932 msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1); 933 msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1); 934 } else { 935 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; 936 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; 937 } 938 939 assert(luma->bankw == chroma->bankw); 940 assert(luma->bankh == chroma->bankh); 941 assert(luma->mtilea == chroma->mtilea); 942 943 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw)); 944 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh)); 945 msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea)); 946} 947