1af69d88dSmrg/************************************************************************** 2af69d88dSmrg * 3af69d88dSmrg * Copyright 2013 Advanced Micro Devices, Inc. 4af69d88dSmrg * All Rights Reserved. 5af69d88dSmrg * 6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 7af69d88dSmrg * copy of this software and associated documentation files (the 8af69d88dSmrg * "Software"), to deal in the Software without restriction, including 9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish, 10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to 11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to 12af69d88dSmrg * the following conditions: 13af69d88dSmrg * 14af69d88dSmrg * The above copyright notice and this permission notice (including the 15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions 16af69d88dSmrg * of the Software. 17af69d88dSmrg * 18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21af69d88dSmrg * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR 22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25af69d88dSmrg * 26af69d88dSmrg **************************************************************************/ 27af69d88dSmrg 287ec681f3Smrg#include "radeon_vce.h" 29af69d88dSmrg 30af69d88dSmrg#include "pipe/p_video_codec.h" 317ec681f3Smrg#include "radeon_video.h" 327ec681f3Smrg#include "radeonsi/si_pipe.h" 33af69d88dSmrg#include "util/u_memory.h" 347ec681f3Smrg#include "util/u_video.h" 35af69d88dSmrg#include "vl/vl_video_buffer.h" 36af69d88dSmrg 377ec681f3Smrg#include <stdio.h> 38af69d88dSmrg 397ec681f3Smrg#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) 407ec681f3Smrg#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) 417ec681f3Smrg#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) 4201e04c3fSmrg#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) 4301e04c3fSmrg#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) 447ec681f3Smrg#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) 457ec681f3Smrg#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) 467ec681f3Smrg#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) 477ec681f3Smrg#define FW_53 (53 << 24) 4801e04c3fSmrg 49af69d88dSmrg/** 50af69d88dSmrg * flush commands to the hardware 51af69d88dSmrg */ 52af69d88dSmrgstatic void flush(struct rvce_encoder *enc) 53af69d88dSmrg{ 547ec681f3Smrg enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL); 557ec681f3Smrg enc->task_info_idx = 0; 567ec681f3Smrg enc->bs_idx = 0; 57af69d88dSmrg} 58af69d88dSmrg 59af69d88dSmrg#if 0 60af69d88dSmrgstatic void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) 61af69d88dSmrg{ 627ec681f3Smrg uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE); 637ec681f3Smrg unsigned i = 0; 647ec681f3Smrg fprintf(stderr, "\n"); 657ec681f3Smrg fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]); 667ec681f3Smrg fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]); 677ec681f3Smrg fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]); 687ec681f3Smrg fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]); 697ec681f3Smrg fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]); 707ec681f3Smrg fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]); 717ec681f3Smrg fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]); 727ec681f3Smrg fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]); 737ec681f3Smrg fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]); 747ec681f3Smrg fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]); 757ec681f3Smrg fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]); 767ec681f3Smrg fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]); 777ec681f3Smrg fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]); 787ec681f3Smrg fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]); 797ec681f3Smrg fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]); 807ec681f3Smrg fprintf(stderr, "\n"); 817ec681f3Smrg enc->ws->buffer_unmap(fb->res->buf); 82af69d88dSmrg} 83af69d88dSmrg#endif 84af69d88dSmrg 85af69d88dSmrg/** 86af69d88dSmrg * reset the CPB handling 87af69d88dSmrg */ 88af69d88dSmrgstatic void reset_cpb(struct rvce_encoder *enc) 89af69d88dSmrg{ 907ec681f3Smrg unsigned i; 917ec681f3Smrg 927ec681f3Smrg list_inithead(&enc->cpb_slots); 937ec681f3Smrg for (i = 0; i < enc->cpb_num; ++i) { 947ec681f3Smrg struct rvce_cpb_slot *slot = &enc->cpb_array[i]; 957ec681f3Smrg slot->index = i; 967ec681f3Smrg slot->picture_type = PIPE_H2645_ENC_PICTURE_TYPE_SKIP; 977ec681f3Smrg slot->frame_num = 0; 987ec681f3Smrg slot->pic_order_cnt = 0; 997ec681f3Smrg list_addtail(&slot->list, &enc->cpb_slots); 1007ec681f3Smrg } 101af69d88dSmrg} 102af69d88dSmrg 103af69d88dSmrg/** 104af69d88dSmrg * sort l0 and l1 to the top of the list 105af69d88dSmrg */ 106af69d88dSmrgstatic void sort_cpb(struct rvce_encoder *enc) 107af69d88dSmrg{ 1087ec681f3Smrg struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; 109af69d88dSmrg 1107ec681f3Smrg LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) { 1117ec681f3Smrg if (i->frame_num == enc->pic.ref_idx_l0) 1127ec681f3Smrg l0 = i; 113af69d88dSmrg 1147ec681f3Smrg if (i->frame_num == enc->pic.ref_idx_l1) 1157ec681f3Smrg l1 = i; 116af69d88dSmrg 1177ec681f3Smrg if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P && l0) 1187ec681f3Smrg break; 119af69d88dSmrg 1207ec681f3Smrg if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B && l0 && l1) 1217ec681f3Smrg break; 1227ec681f3Smrg } 123af69d88dSmrg 1247ec681f3Smrg if (l1) { 1257ec681f3Smrg list_del(&l1->list); 1267ec681f3Smrg list_add(&l1->list, &enc->cpb_slots); 1277ec681f3Smrg } 128af69d88dSmrg 1297ec681f3Smrg if (l0) { 1307ec681f3Smrg list_del(&l0->list); 1317ec681f3Smrg list_add(&l0->list, &enc->cpb_slots); 1327ec681f3Smrg } 133af69d88dSmrg} 134af69d88dSmrg 135af69d88dSmrg/** 136af69d88dSmrg * get number of cpbs based on dpb 137af69d88dSmrg */ 138af69d88dSmrgstatic unsigned get_cpb_num(struct rvce_encoder *enc) 139af69d88dSmrg{ 1407ec681f3Smrg unsigned w = align(enc->base.width, 16) / 16; 1417ec681f3Smrg unsigned h = align(enc->base.height, 16) / 16; 1427ec681f3Smrg unsigned dpb; 1437ec681f3Smrg 1447ec681f3Smrg switch (enc->base.level) { 1457ec681f3Smrg case 10: 1467ec681f3Smrg dpb = 396; 1477ec681f3Smrg break; 1487ec681f3Smrg case 11: 1497ec681f3Smrg dpb = 900; 1507ec681f3Smrg break; 1517ec681f3Smrg case 12: 1527ec681f3Smrg case 13: 1537ec681f3Smrg case 20: 1547ec681f3Smrg dpb = 2376; 1557ec681f3Smrg break; 1567ec681f3Smrg case 21: 1577ec681f3Smrg dpb = 4752; 1587ec681f3Smrg break; 1597ec681f3Smrg case 22: 1607ec681f3Smrg case 30: 1617ec681f3Smrg dpb = 8100; 1627ec681f3Smrg break; 1637ec681f3Smrg case 31: 1647ec681f3Smrg dpb = 18000; 1657ec681f3Smrg break; 1667ec681f3Smrg case 32: 1677ec681f3Smrg dpb = 20480; 1687ec681f3Smrg break; 1697ec681f3Smrg case 40: 1707ec681f3Smrg case 41: 1717ec681f3Smrg dpb = 32768; 1727ec681f3Smrg break; 1737ec681f3Smrg case 42: 1747ec681f3Smrg dpb = 34816; 1757ec681f3Smrg break; 1767ec681f3Smrg case 50: 1777ec681f3Smrg dpb = 110400; 1787ec681f3Smrg break; 1797ec681f3Smrg default: 1807ec681f3Smrg case 51: 1817ec681f3Smrg case 52: 1827ec681f3Smrg dpb = 184320; 1837ec681f3Smrg break; 1847ec681f3Smrg } 1857ec681f3Smrg 1867ec681f3Smrg return MIN2(dpb / (w * h), 16); 187af69d88dSmrg} 188af69d88dSmrg 18901e04c3fSmrg/** 19001e04c3fSmrg * Get the slot for the currently encoded frame 19101e04c3fSmrg */ 19201e04c3fSmrgstruct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc) 19301e04c3fSmrg{ 1947ec681f3Smrg return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); 19501e04c3fSmrg} 19601e04c3fSmrg 19701e04c3fSmrg/** 19801e04c3fSmrg * Get the slot for L0 19901e04c3fSmrg */ 20001e04c3fSmrgstruct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc) 20101e04c3fSmrg{ 2027ec681f3Smrg return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); 20301e04c3fSmrg} 20401e04c3fSmrg 20501e04c3fSmrg/** 20601e04c3fSmrg * Get the slot for L1 20701e04c3fSmrg */ 20801e04c3fSmrgstruct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc) 20901e04c3fSmrg{ 2107ec681f3Smrg return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); 21101e04c3fSmrg} 21201e04c3fSmrg 21301e04c3fSmrg/** 21401e04c3fSmrg * Calculate the offsets into the CPB 21501e04c3fSmrg */ 2167ec681f3Smrgvoid si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, 2177ec681f3Smrg signed *chroma_offset) 21801e04c3fSmrg{ 2197ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)enc->screen; 2207ec681f3Smrg unsigned pitch, vpitch, fsize; 2217ec681f3Smrg 2227ec681f3Smrg if (sscreen->info.chip_class < GFX9) { 2237ec681f3Smrg pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); 2247ec681f3Smrg vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); 2257ec681f3Smrg } else { 2267ec681f3Smrg pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); 2277ec681f3Smrg vpitch = align(enc->luma->u.gfx9.surf_height, 16); 2287ec681f3Smrg } 2297ec681f3Smrg fsize = pitch * (vpitch + vpitch / 2); 2307ec681f3Smrg 2317ec681f3Smrg *luma_offset = slot->index * fsize; 2327ec681f3Smrg *chroma_offset = *luma_offset + pitch * vpitch; 23301e04c3fSmrg} 23401e04c3fSmrg 235af69d88dSmrg/** 236af69d88dSmrg * destroy this video encoder 237af69d88dSmrg */ 238af69d88dSmrgstatic void rvce_destroy(struct pipe_video_codec *encoder) 239af69d88dSmrg{ 2407ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 2417ec681f3Smrg if (enc->stream_handle) { 2427ec681f3Smrg struct rvid_buffer fb; 2437ec681f3Smrg si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); 2447ec681f3Smrg enc->fb = &fb; 2457ec681f3Smrg enc->session(enc); 2467ec681f3Smrg enc->destroy(enc); 2477ec681f3Smrg flush(enc); 2487ec681f3Smrg si_vid_destroy_buffer(&fb); 2497ec681f3Smrg } 2507ec681f3Smrg si_vid_destroy_buffer(&enc->cpb); 2517ec681f3Smrg enc->ws->cs_destroy(&enc->cs); 2527ec681f3Smrg FREE(enc->cpb_array); 2537ec681f3Smrg FREE(enc); 254af69d88dSmrg} 255af69d88dSmrg 2567ec681f3Smrgstatic void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, 2577ec681f3Smrg struct pipe_picture_desc *picture) 258af69d88dSmrg{ 2597ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 2607ec681f3Smrg struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; 2617ec681f3Smrg struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; 2627ec681f3Smrg 2637ec681f3Smrg bool need_rate_control = 2647ec681f3Smrg enc->pic.rate_ctrl[0].rate_ctrl_method != pic->rate_ctrl[0].rate_ctrl_method || 2657ec681f3Smrg enc->pic.quant_i_frames != pic->quant_i_frames || 2667ec681f3Smrg enc->pic.quant_p_frames != pic->quant_p_frames || 2677ec681f3Smrg enc->pic.quant_b_frames != pic->quant_b_frames || 2687ec681f3Smrg enc->pic.rate_ctrl[0].target_bitrate != pic->rate_ctrl[0].target_bitrate || 2697ec681f3Smrg enc->pic.rate_ctrl[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num || 2707ec681f3Smrg enc->pic.rate_ctrl[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den; 2717ec681f3Smrg 2727ec681f3Smrg enc->pic = *pic; 2737ec681f3Smrg enc->si_get_pic_param(enc, pic); 2747ec681f3Smrg 2757ec681f3Smrg enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); 2767ec681f3Smrg enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); 2777ec681f3Smrg 2787ec681f3Smrg if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR) 2797ec681f3Smrg reset_cpb(enc); 2807ec681f3Smrg else if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P || 2817ec681f3Smrg pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B) 2827ec681f3Smrg sort_cpb(enc); 2837ec681f3Smrg 2847ec681f3Smrg if (!enc->stream_handle) { 2857ec681f3Smrg struct rvid_buffer fb; 2867ec681f3Smrg enc->stream_handle = si_vid_alloc_stream_handle(); 2877ec681f3Smrg si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); 2887ec681f3Smrg enc->fb = &fb; 2897ec681f3Smrg enc->session(enc); 2907ec681f3Smrg enc->create(enc); 2917ec681f3Smrg enc->config(enc); 2927ec681f3Smrg enc->feedback(enc); 2937ec681f3Smrg flush(enc); 2947ec681f3Smrg // dump_feedback(enc, &fb); 2957ec681f3Smrg si_vid_destroy_buffer(&fb); 2967ec681f3Smrg need_rate_control = false; 2977ec681f3Smrg } 2987ec681f3Smrg 2997ec681f3Smrg if (need_rate_control) { 3007ec681f3Smrg enc->session(enc); 3017ec681f3Smrg enc->config(enc); 3027ec681f3Smrg flush(enc); 3037ec681f3Smrg } 304af69d88dSmrg} 305af69d88dSmrg 306af69d88dSmrgstatic void rvce_encode_bitstream(struct pipe_video_codec *encoder, 3077ec681f3Smrg struct pipe_video_buffer *source, 3087ec681f3Smrg struct pipe_resource *destination, void **fb) 309af69d88dSmrg{ 3107ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 3117ec681f3Smrg enc->get_buffer(destination, &enc->bs_handle, NULL); 3127ec681f3Smrg enc->bs_size = destination->width0; 3137ec681f3Smrg 3147ec681f3Smrg *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); 3157ec681f3Smrg if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { 3167ec681f3Smrg RVID_ERR("Can't create feedback buffer.\n"); 3177ec681f3Smrg return; 3187ec681f3Smrg } 3197ec681f3Smrg if (!radeon_emitted(&enc->cs, 0)) 3207ec681f3Smrg enc->session(enc); 3217ec681f3Smrg enc->encode(enc); 3227ec681f3Smrg enc->feedback(enc); 323af69d88dSmrg} 324af69d88dSmrg 3257ec681f3Smrgstatic void rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, 3267ec681f3Smrg struct pipe_picture_desc *picture) 327af69d88dSmrg{ 3287ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 3297ec681f3Smrg struct rvce_cpb_slot *slot = LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); 3307ec681f3Smrg 3317ec681f3Smrg if (!enc->dual_inst || enc->bs_idx > 1) 3327ec681f3Smrg flush(enc); 3337ec681f3Smrg 3347ec681f3Smrg /* update the CPB backtrack with the just encoded frame */ 3357ec681f3Smrg slot->picture_type = enc->pic.picture_type; 3367ec681f3Smrg slot->frame_num = enc->pic.frame_num; 3377ec681f3Smrg slot->pic_order_cnt = enc->pic.pic_order_cnt; 3387ec681f3Smrg if (!enc->pic.not_referenced) { 3397ec681f3Smrg list_del(&slot->list); 3407ec681f3Smrg list_add(&slot->list, &enc->cpb_slots); 3417ec681f3Smrg } 342af69d88dSmrg} 343af69d88dSmrg 3447ec681f3Smrgstatic void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size) 345af69d88dSmrg{ 3467ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 3477ec681f3Smrg struct rvid_buffer *fb = feedback; 3487ec681f3Smrg 3497ec681f3Smrg if (size) { 3507ec681f3Smrg uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs, 3517ec681f3Smrg PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); 3527ec681f3Smrg 3537ec681f3Smrg if (ptr[1]) { 3547ec681f3Smrg *size = ptr[4] - ptr[9]; 3557ec681f3Smrg } else { 3567ec681f3Smrg *size = 0; 3577ec681f3Smrg } 3587ec681f3Smrg 3597ec681f3Smrg enc->ws->buffer_unmap(enc->ws, fb->res->buf); 3607ec681f3Smrg } 3617ec681f3Smrg // dump_feedback(enc, fb); 3627ec681f3Smrg si_vid_destroy_buffer(fb); 3637ec681f3Smrg FREE(fb); 364af69d88dSmrg} 365af69d88dSmrg 366af69d88dSmrg/** 367af69d88dSmrg * flush any outstanding command buffers to the hardware 368af69d88dSmrg */ 369af69d88dSmrgstatic void rvce_flush(struct pipe_video_codec *encoder) 370af69d88dSmrg{ 3717ec681f3Smrg struct rvce_encoder *enc = (struct rvce_encoder *)encoder; 37201e04c3fSmrg 3737ec681f3Smrg flush(enc); 374af69d88dSmrg} 375af69d88dSmrg 3767ec681f3Smrgstatic void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence) 377af69d88dSmrg{ 3787ec681f3Smrg // just ignored 379af69d88dSmrg} 380af69d88dSmrg 38101e04c3fSmrgstruct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, 3827ec681f3Smrg const struct pipe_video_codec *templ, 3837ec681f3Smrg struct radeon_winsys *ws, rvce_get_buffer get_buffer) 384af69d88dSmrg{ 3857ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)context->screen; 3867ec681f3Smrg struct si_context *sctx = (struct si_context *)context; 3877ec681f3Smrg struct rvce_encoder *enc; 3887ec681f3Smrg struct pipe_video_buffer *tmp_buf, templat = {}; 3897ec681f3Smrg struct radeon_surf *tmp_surf; 3907ec681f3Smrg unsigned cpb_size; 3917ec681f3Smrg 3927ec681f3Smrg if (!sscreen->info.vce_fw_version) { 3937ec681f3Smrg RVID_ERR("Kernel doesn't supports VCE!\n"); 3947ec681f3Smrg return NULL; 3957ec681f3Smrg 3967ec681f3Smrg } else if (!si_vce_is_fw_version_supported(sscreen)) { 3977ec681f3Smrg RVID_ERR("Unsupported VCE fw version loaded!\n"); 3987ec681f3Smrg return NULL; 3997ec681f3Smrg } 4007ec681f3Smrg 4017ec681f3Smrg enc = CALLOC_STRUCT(rvce_encoder); 4027ec681f3Smrg if (!enc) 4037ec681f3Smrg return NULL; 4047ec681f3Smrg 4057ec681f3Smrg if (sscreen->info.is_amdgpu) 4067ec681f3Smrg enc->use_vm = true; 4077ec681f3Smrg if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || sscreen->info.is_amdgpu) 4087ec681f3Smrg enc->use_vui = true; 4097ec681f3Smrg if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY && 4107ec681f3Smrg sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 && 4117ec681f3Smrg sscreen->info.family != CHIP_VEGAM) 4127ec681f3Smrg enc->dual_pipe = true; 4137ec681f3Smrg /* TODO enable B frame with dual instance */ 4147ec681f3Smrg if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) && 4157ec681f3Smrg (sscreen->info.vce_harvest_config == 0)) 4167ec681f3Smrg enc->dual_inst = true; 4177ec681f3Smrg 4187ec681f3Smrg enc->base = *templ; 4197ec681f3Smrg enc->base.context = context; 4207ec681f3Smrg 4217ec681f3Smrg enc->base.destroy = rvce_destroy; 4227ec681f3Smrg enc->base.begin_frame = rvce_begin_frame; 4237ec681f3Smrg enc->base.encode_bitstream = rvce_encode_bitstream; 4247ec681f3Smrg enc->base.end_frame = rvce_end_frame; 4257ec681f3Smrg enc->base.flush = rvce_flush; 4267ec681f3Smrg enc->base.get_feedback = rvce_get_feedback; 4277ec681f3Smrg enc->get_buffer = get_buffer; 4287ec681f3Smrg 4297ec681f3Smrg enc->screen = context->screen; 4307ec681f3Smrg enc->ws = ws; 4317ec681f3Smrg 4327ec681f3Smrg if (!ws->cs_create(&enc->cs, sctx->ctx, RING_VCE, rvce_cs_flush, enc, false)) { 4337ec681f3Smrg RVID_ERR("Can't get command submission context.\n"); 4347ec681f3Smrg goto error; 4357ec681f3Smrg } 4367ec681f3Smrg 4377ec681f3Smrg templat.buffer_format = PIPE_FORMAT_NV12; 4387ec681f3Smrg templat.width = enc->base.width; 4397ec681f3Smrg templat.height = enc->base.height; 4407ec681f3Smrg templat.interlaced = false; 4417ec681f3Smrg if (!(tmp_buf = context->create_video_buffer(context, &templat))) { 4427ec681f3Smrg RVID_ERR("Can't create video buffer.\n"); 4437ec681f3Smrg goto error; 4447ec681f3Smrg } 4457ec681f3Smrg 4467ec681f3Smrg enc->cpb_num = get_cpb_num(enc); 4477ec681f3Smrg if (!enc->cpb_num) 4487ec681f3Smrg goto error; 4497ec681f3Smrg 4507ec681f3Smrg get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); 4517ec681f3Smrg 4527ec681f3Smrg cpb_size = (sscreen->info.chip_class < GFX9) 4537ec681f3Smrg ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * 4547ec681f3Smrg align(tmp_surf->u.legacy.level[0].nblk_y, 32) 4557ec681f3Smrg : 4567ec681f3Smrg 4577ec681f3Smrg align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * 4587ec681f3Smrg align(tmp_surf->u.gfx9.surf_height, 32); 4597ec681f3Smrg 4607ec681f3Smrg cpb_size = cpb_size * 3 / 2; 4617ec681f3Smrg cpb_size = cpb_size * enc->cpb_num; 4627ec681f3Smrg if (enc->dual_pipe) 4637ec681f3Smrg cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; 4647ec681f3Smrg tmp_buf->destroy(tmp_buf); 4657ec681f3Smrg if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { 4667ec681f3Smrg RVID_ERR("Can't create CPB buffer.\n"); 4677ec681f3Smrg goto error; 4687ec681f3Smrg } 4697ec681f3Smrg 4707ec681f3Smrg enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); 4717ec681f3Smrg if (!enc->cpb_array) 4727ec681f3Smrg goto error; 4737ec681f3Smrg 4747ec681f3Smrg reset_cpb(enc); 4757ec681f3Smrg 4767ec681f3Smrg switch (sscreen->info.vce_fw_version) { 4777ec681f3Smrg case FW_40_2_2: 4787ec681f3Smrg si_vce_40_2_2_init(enc); 4797ec681f3Smrg break; 4807ec681f3Smrg 4817ec681f3Smrg case FW_50_0_1: 4827ec681f3Smrg case FW_50_1_2: 4837ec681f3Smrg case FW_50_10_2: 4847ec681f3Smrg case FW_50_17_3: 4857ec681f3Smrg si_vce_50_init(enc); 4867ec681f3Smrg break; 4877ec681f3Smrg 4887ec681f3Smrg case FW_52_0_3: 4897ec681f3Smrg case FW_52_4_3: 4907ec681f3Smrg case FW_52_8_3: 4917ec681f3Smrg si_vce_52_init(enc); 4927ec681f3Smrg break; 4937ec681f3Smrg 4947ec681f3Smrg default: 4957ec681f3Smrg if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { 4967ec681f3Smrg si_vce_52_init(enc); 4977ec681f3Smrg } else 4987ec681f3Smrg goto error; 4997ec681f3Smrg } 5007ec681f3Smrg 5017ec681f3Smrg return &enc->base; 502af69d88dSmrg 503af69d88dSmrgerror: 5047ec681f3Smrg enc->ws->cs_destroy(&enc->cs); 505af69d88dSmrg 5067ec681f3Smrg si_vid_destroy_buffer(&enc->cpb); 507af69d88dSmrg 5087ec681f3Smrg FREE(enc->cpb_array); 5097ec681f3Smrg FREE(enc); 5107ec681f3Smrg return NULL; 511af69d88dSmrg} 512af69d88dSmrg 513af69d88dSmrg/** 514af69d88dSmrg * check if kernel has the right fw version loaded 515af69d88dSmrg */ 51601e04c3fSmrgbool si_vce_is_fw_version_supported(struct si_screen *sscreen) 517af69d88dSmrg{ 5187ec681f3Smrg switch (sscreen->info.vce_fw_version) { 5197ec681f3Smrg case FW_40_2_2: 5207ec681f3Smrg case FW_50_0_1: 5217ec681f3Smrg case FW_50_1_2: 5227ec681f3Smrg case FW_50_10_2: 5237ec681f3Smrg case FW_50_17_3: 5247ec681f3Smrg case FW_52_0_3: 5257ec681f3Smrg case FW_52_4_3: 5267ec681f3Smrg case FW_52_8_3: 5277ec681f3Smrg return true; 5287ec681f3Smrg default: 5297ec681f3Smrg if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) 5307ec681f3Smrg return true; 5317ec681f3Smrg else 5327ec681f3Smrg return false; 5337ec681f3Smrg } 53401e04c3fSmrg} 53501e04c3fSmrg 53601e04c3fSmrg/** 53701e04c3fSmrg * Add the buffer as relocation to the current command submission 53801e04c3fSmrg */ 5397ec681f3Smrgvoid si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, 5407ec681f3Smrg enum radeon_bo_domain domain, signed offset) 54101e04c3fSmrg{ 5427ec681f3Smrg int reloc_idx; 5437ec681f3Smrg 5447ec681f3Smrg reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); 5457ec681f3Smrg if (enc->use_vm) { 5467ec681f3Smrg uint64_t addr; 5477ec681f3Smrg addr = enc->ws->buffer_get_virtual_address(buf); 5487ec681f3Smrg addr = addr + offset; 5497ec681f3Smrg RVCE_CS(addr >> 32); 5507ec681f3Smrg RVCE_CS(addr); 5517ec681f3Smrg } else { 5527ec681f3Smrg offset += enc->ws->buffer_get_reloc_offset(buf); 5537ec681f3Smrg RVCE_CS(reloc_idx * 4); 5547ec681f3Smrg RVCE_CS(offset); 5557ec681f3Smrg } 556af69d88dSmrg} 557