10ed5401bSmrg/* 20ed5401bSmrg * Copyright 2017 Advanced Micro Devices, Inc. 30ed5401bSmrg * 40ed5401bSmrg * Permission is hereby granted, free of charge, to any person obtaining a 50ed5401bSmrg * copy of this software and associated documentation files (the "Software"), 60ed5401bSmrg * to deal in the Software without restriction, including without limitation 70ed5401bSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 80ed5401bSmrg * and/or sell copies of the Software, and to permit persons to whom the 90ed5401bSmrg * Software is furnished to do so, subject to the following conditions: 100ed5401bSmrg * 110ed5401bSmrg * The above copyright notice and this permission notice shall be included in 120ed5401bSmrg * all copies or substantial portions of the Software. 130ed5401bSmrg * 140ed5401bSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 150ed5401bSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 160ed5401bSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 170ed5401bSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 180ed5401bSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 190ed5401bSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 200ed5401bSmrg * OTHER DEALINGS IN THE SOFTWARE. 210ed5401bSmrg * 220ed5401bSmrg */ 230ed5401bSmrg 240ed5401bSmrg#include <inttypes.h> 250ed5401bSmrg#include <stdio.h> 260ed5401bSmrg 270ed5401bSmrg#include "CUnit/Basic.h" 280ed5401bSmrg 290ed5401bSmrg#include "util_math.h" 300ed5401bSmrg 310ed5401bSmrg#include "amdgpu_drm.h" 320ed5401bSmrg#include "amdgpu_internal.h" 330ed5401bSmrg#include "amdgpu_test.h" 340ed5401bSmrg#include "decode_messages.h" 350ed5401bSmrg 360ed5401bSmrg/* jpeg registers */ 370ed5401bSmrg#define mmUVD_JPEG_CNTL 0x0200 380ed5401bSmrg#define mmUVD_JPEG_RB_BASE 0x0201 390ed5401bSmrg#define mmUVD_JPEG_RB_WPTR 0x0202 400ed5401bSmrg#define mmUVD_JPEG_RB_RPTR 0x0203 410ed5401bSmrg#define mmUVD_JPEG_RB_SIZE 0x0204 420ed5401bSmrg#define mmUVD_JPEG_TIER_CNTL2 0x021a 430ed5401bSmrg#define mmUVD_JPEG_UV_TILING_CTRL 0x021c 440ed5401bSmrg#define mmUVD_JPEG_TILING_CTRL 0x021e 450ed5401bSmrg#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 460ed5401bSmrg#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 470ed5401bSmrg#define mmUVD_JPEG_PITCH 0x0222 480ed5401bSmrg#define mmUVD_JPEG_INT_EN 0x0229 490ed5401bSmrg#define mmUVD_JPEG_UV_PITCH 0x022b 500ed5401bSmrg#define mmUVD_JPEG_INDEX 0x023e 510ed5401bSmrg#define mmUVD_JPEG_DATA 0x023f 520ed5401bSmrg#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 530ed5401bSmrg#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 540ed5401bSmrg#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a 550ed5401bSmrg#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b 560ed5401bSmrg#define mmUVD_CTX_INDEX 0x0528 570ed5401bSmrg#define mmUVD_CTX_DATA 0x0529 580ed5401bSmrg#define mmUVD_SOFT_RESET 0x05a0 590ed5401bSmrg 600ed5401bSmrg#define vcnipUVD_JPEG_DEC_SOFT_RST 0x402f 610ed5401bSmrg#define vcnipUVD_JRBC_IB_COND_RD_TIMER 0x408e 620ed5401bSmrg#define vcnipUVD_JRBC_IB_REF_DATA 0x408f 630ed5401bSmrg#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x40e1 640ed5401bSmrg#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x40e0 650ed5401bSmrg#define vcnipUVD_JPEG_RB_BASE 0x4001 660ed5401bSmrg#define vcnipUVD_JPEG_RB_SIZE 0x4004 670ed5401bSmrg#define vcnipUVD_JPEG_RB_WPTR 0x4002 680ed5401bSmrg#define vcnipUVD_JPEG_PITCH 0x401f 690ed5401bSmrg#define vcnipUVD_JPEG_UV_PITCH 0x4020 700ed5401bSmrg#define vcnipJPEG_DEC_ADDR_MODE 0x4027 710ed5401bSmrg#define vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE 0x4024 720ed5401bSmrg#define vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE 0x4025 730ed5401bSmrg#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x40e3 740ed5401bSmrg#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x40e2 750ed5401bSmrg#define vcnipUVD_JPEG_INDEX 0x402c 760ed5401bSmrg#define vcnipUVD_JPEG_DATA 0x402d 770ed5401bSmrg#define vcnipUVD_JPEG_TIER_CNTL2 0x400f 780ed5401bSmrg#define vcnipUVD_JPEG_OUTBUF_RPTR 0x401e 790ed5401bSmrg#define vcnipUVD_JPEG_OUTBUF_CNTL 0x401c 800ed5401bSmrg#define vcnipUVD_JPEG_INT_EN 0x400a 810ed5401bSmrg#define vcnipUVD_JPEG_CNTL 0x4000 820ed5401bSmrg#define vcnipUVD_JPEG_RB_RPTR 0x4003 830ed5401bSmrg#define vcnipUVD_JPEG_OUTBUF_WPTR 0x401d 840ed5401bSmrg 850ed5401bSmrg 860ed5401bSmrg#define RDECODE_PKT_REG_J(x) ((unsigned)(x)&0x3FFFF) 870ed5401bSmrg#define RDECODE_PKT_RES_J(x) (((unsigned)(x)&0x3F) << 18) 880ed5401bSmrg#define RDECODE_PKT_COND_J(x) (((unsigned)(x)&0xF) << 24) 890ed5401bSmrg#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x)&0xF) << 28) 900ed5401bSmrg#define RDECODE_PKTJ(reg, cond, type) (RDECODE_PKT_REG_J(reg) | \ 910ed5401bSmrg RDECODE_PKT_RES_J(0) | \ 920ed5401bSmrg RDECODE_PKT_COND_J(cond) | \ 930ed5401bSmrg RDECODE_PKT_TYPE_J(type)) 940ed5401bSmrg 950ed5401bSmrg#define UVD_BASE_INST0_SEG1 0x00007E00 960ed5401bSmrg#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) 970ed5401bSmrg 980ed5401bSmrg#define COND0 0 990ed5401bSmrg#define COND1 1 1000ed5401bSmrg#define COND3 3 1010ed5401bSmrg#define TYPE0 0 1020ed5401bSmrg#define TYPE1 1 1030ed5401bSmrg#define TYPE3 3 1040ed5401bSmrg#define JPEG_DEC_DT_PITCH 0x100 1050ed5401bSmrg#define JPEG_DEC_BSD_SIZE 0x180 1060ed5401bSmrg#define JPEG_DEC_LUMA_OFFSET 0 1070ed5401bSmrg#define JPEG_DEC_CHROMA_OFFSET 0x1000 1080ed5401bSmrg#define JPEG_DEC_SUM 4096 1090ed5401bSmrg#define IB_SIZE 4096 1100ed5401bSmrg#define MAX_RESOURCES 16 1110ed5401bSmrg 1120ed5401bSmrgstruct amdgpu_jpeg_bo { 1130ed5401bSmrg amdgpu_bo_handle handle; 1140ed5401bSmrg amdgpu_va_handle va_handle; 1150ed5401bSmrg uint64_t addr; 1160ed5401bSmrg uint64_t size; 1170ed5401bSmrg uint8_t *ptr; 1180ed5401bSmrg}; 1190ed5401bSmrg 1200ed5401bSmrgstatic amdgpu_device_handle device_handle; 1210ed5401bSmrgstatic uint32_t major_version; 1220ed5401bSmrgstatic uint32_t minor_version; 1230ed5401bSmrgstatic uint32_t family_id; 1240ed5401bSmrgstatic uint32_t chip_rev; 1250ed5401bSmrgstatic uint32_t chip_id; 1260ed5401bSmrgstatic uint32_t asic_id; 1270ed5401bSmrgstatic uint32_t chip_rev; 1280ed5401bSmrgstatic uint32_t chip_id; 1290ed5401bSmrg 1300ed5401bSmrgstatic amdgpu_context_handle context_handle; 1310ed5401bSmrgstatic amdgpu_bo_handle ib_handle; 1320ed5401bSmrgstatic amdgpu_va_handle ib_va_handle; 1330ed5401bSmrgstatic uint64_t ib_mc_address; 1340ed5401bSmrgstatic uint32_t *ib_cpu; 1350ed5401bSmrgstatic uint32_t len; 1360ed5401bSmrg 1370ed5401bSmrgstatic amdgpu_bo_handle resources[MAX_RESOURCES]; 1380ed5401bSmrgstatic unsigned num_resources; 1390ed5401bSmrgbool jpeg_direct_reg; 1400ed5401bSmrg 1410ed5401bSmrgstatic void set_reg_jpeg(unsigned reg, unsigned cond, unsigned type, 1420ed5401bSmrg uint32_t val); 1430ed5401bSmrgstatic void send_cmd_bitstream(uint64_t addr); 1440ed5401bSmrgstatic void send_cmd_target(uint64_t addr); 1450ed5401bSmrgstatic void send_cmd_bitstream_direct(uint64_t addr); 1460ed5401bSmrgstatic void send_cmd_target_direct(uint64_t addr); 1470ed5401bSmrg 1480ed5401bSmrgstatic void amdgpu_cs_jpeg_decode(void); 1490ed5401bSmrg 1500ed5401bSmrgCU_TestInfo jpeg_tests[] = { 1510ed5401bSmrg {"JPEG decode", amdgpu_cs_jpeg_decode}, 1520ed5401bSmrg CU_TEST_INFO_NULL, 1530ed5401bSmrg}; 1540ed5401bSmrg 1550ed5401bSmrgCU_BOOL suite_jpeg_tests_enable(void) 1560ed5401bSmrg{ 1570ed5401bSmrg struct drm_amdgpu_info_hw_ip info; 1580ed5401bSmrg int r; 1590ed5401bSmrg 1600ed5401bSmrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, &minor_version, 1610ed5401bSmrg &device_handle)) 1620ed5401bSmrg return CU_FALSE; 1630ed5401bSmrg 1640ed5401bSmrg family_id = device_handle->info.family_id; 1650ed5401bSmrg asic_id = device_handle->info.asic_id; 1660ed5401bSmrg chip_rev = device_handle->info.chip_rev; 1670ed5401bSmrg chip_id = device_handle->info.chip_external_rev; 1680ed5401bSmrg 1690ed5401bSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_JPEG, 0, &info); 1700ed5401bSmrg 1710ed5401bSmrg if (amdgpu_device_deinitialize(device_handle)) 1720ed5401bSmrg return CU_FALSE; 1730ed5401bSmrg 1740ed5401bSmrg if (r != 0 || !info.available_rings || 1750ed5401bSmrg (family_id < AMDGPU_FAMILY_RV && 1760ed5401bSmrg (family_id == AMDGPU_FAMILY_AI && 1770ed5401bSmrg (chip_id - chip_rev) < 0x32))) { /* Arcturus */ 1780ed5401bSmrg printf("\n\nThe ASIC NOT support JPEG, suite disabled\n"); 1790ed5401bSmrg return CU_FALSE; 1800ed5401bSmrg } 1810ed5401bSmrg 1820ed5401bSmrg if (info.hw_ip_version_major == 1) 1830ed5401bSmrg jpeg_direct_reg = false; 184bbff01ceSmrg else if (info.hw_ip_version_major > 1 && info.hw_ip_version_major <= 4) 1850ed5401bSmrg jpeg_direct_reg = true; 1860ed5401bSmrg else 1870ed5401bSmrg return CU_FALSE; 1880ed5401bSmrg 1890ed5401bSmrg return CU_TRUE; 1900ed5401bSmrg} 1910ed5401bSmrg 1920ed5401bSmrgint suite_jpeg_tests_init(void) 1930ed5401bSmrg{ 1940ed5401bSmrg int r; 1950ed5401bSmrg 1960ed5401bSmrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, &minor_version, 1970ed5401bSmrg &device_handle); 1980ed5401bSmrg if (r) 1990ed5401bSmrg return CUE_SINIT_FAILED; 2000ed5401bSmrg 2010ed5401bSmrg family_id = device_handle->info.family_id; 2020ed5401bSmrg 2030ed5401bSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2040ed5401bSmrg if (r) 2050ed5401bSmrg return CUE_SINIT_FAILED; 2060ed5401bSmrg 2070ed5401bSmrg r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, 2080ed5401bSmrg AMDGPU_GEM_DOMAIN_GTT, 0, &ib_handle, 2090ed5401bSmrg (void **)&ib_cpu, &ib_mc_address, &ib_va_handle); 2100ed5401bSmrg if (r) 2110ed5401bSmrg return CUE_SINIT_FAILED; 2120ed5401bSmrg 2130ed5401bSmrg return CUE_SUCCESS; 2140ed5401bSmrg} 2150ed5401bSmrg 2160ed5401bSmrgint suite_jpeg_tests_clean(void) 2170ed5401bSmrg{ 2180ed5401bSmrg int r; 2190ed5401bSmrg 2200ed5401bSmrg r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, ib_mc_address, IB_SIZE); 2210ed5401bSmrg if (r) 2220ed5401bSmrg return CUE_SCLEAN_FAILED; 2230ed5401bSmrg 2240ed5401bSmrg r = amdgpu_cs_ctx_free(context_handle); 2250ed5401bSmrg if (r) 2260ed5401bSmrg return CUE_SCLEAN_FAILED; 2270ed5401bSmrg 2280ed5401bSmrg r = amdgpu_device_deinitialize(device_handle); 2290ed5401bSmrg if (r) 2300ed5401bSmrg return CUE_SCLEAN_FAILED; 2310ed5401bSmrg 2320ed5401bSmrg return CUE_SUCCESS; 2330ed5401bSmrg} 2340ed5401bSmrg 2350ed5401bSmrgstatic int submit(unsigned ndw, unsigned ip) 2360ed5401bSmrg{ 2370ed5401bSmrg struct amdgpu_cs_request ibs_request = {0}; 2380ed5401bSmrg struct amdgpu_cs_ib_info ib_info = {0}; 2390ed5401bSmrg struct amdgpu_cs_fence fence_status = {0}; 2400ed5401bSmrg uint32_t expired; 2410ed5401bSmrg int r; 2420ed5401bSmrg 2430ed5401bSmrg ib_info.ib_mc_address = ib_mc_address; 2440ed5401bSmrg ib_info.size = ndw; 2450ed5401bSmrg 2460ed5401bSmrg ibs_request.ip_type = ip; 2470ed5401bSmrg 2480ed5401bSmrg r = amdgpu_bo_list_create(device_handle, num_resources, resources, NULL, 2490ed5401bSmrg &ibs_request.resources); 2500ed5401bSmrg if (r) 2510ed5401bSmrg return r; 2520ed5401bSmrg 2530ed5401bSmrg ibs_request.number_of_ibs = 1; 2540ed5401bSmrg ibs_request.ibs = &ib_info; 2550ed5401bSmrg ibs_request.fence_info.handle = NULL; 2560ed5401bSmrg 2570ed5401bSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2580ed5401bSmrg if (r) 2590ed5401bSmrg return r; 2600ed5401bSmrg 2610ed5401bSmrg r = amdgpu_bo_list_destroy(ibs_request.resources); 2620ed5401bSmrg if (r) 2630ed5401bSmrg return r; 2640ed5401bSmrg 2650ed5401bSmrg fence_status.context = context_handle; 2660ed5401bSmrg fence_status.ip_type = ip; 2670ed5401bSmrg fence_status.fence = ibs_request.seq_no; 2680ed5401bSmrg 2690ed5401bSmrg r = amdgpu_cs_query_fence_status(&fence_status, AMDGPU_TIMEOUT_INFINITE, 0, 2700ed5401bSmrg &expired); 2710ed5401bSmrg if (r) 2720ed5401bSmrg return r; 2730ed5401bSmrg 2740ed5401bSmrg return 0; 2750ed5401bSmrg} 2760ed5401bSmrg 2770ed5401bSmrgstatic void alloc_resource(struct amdgpu_jpeg_bo *jpeg_bo, unsigned size, 2780ed5401bSmrg unsigned domain) 2790ed5401bSmrg{ 2800ed5401bSmrg struct amdgpu_bo_alloc_request req = {0}; 2810ed5401bSmrg amdgpu_bo_handle buf_handle; 2820ed5401bSmrg amdgpu_va_handle va_handle; 2830ed5401bSmrg uint64_t va = 0; 2840ed5401bSmrg int r; 2850ed5401bSmrg 2860ed5401bSmrg req.alloc_size = ALIGN(size, 4096); 2870ed5401bSmrg req.preferred_heap = domain; 2880ed5401bSmrg r = amdgpu_bo_alloc(device_handle, &req, &buf_handle); 2890ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2900ed5401bSmrg r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general, 2910ed5401bSmrg req.alloc_size, 1, 0, &va, &va_handle, 0); 2920ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2930ed5401bSmrg r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0, AMDGPU_VA_OP_MAP); 2940ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 2950ed5401bSmrg jpeg_bo->addr = va; 2960ed5401bSmrg jpeg_bo->handle = buf_handle; 2970ed5401bSmrg jpeg_bo->size = req.alloc_size; 2980ed5401bSmrg jpeg_bo->va_handle = va_handle; 2990ed5401bSmrg r = amdgpu_bo_cpu_map(jpeg_bo->handle, (void **)&jpeg_bo->ptr); 3000ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3010ed5401bSmrg memset(jpeg_bo->ptr, 0, size); 3020ed5401bSmrg r = amdgpu_bo_cpu_unmap(jpeg_bo->handle); 3030ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3040ed5401bSmrg} 3050ed5401bSmrg 3060ed5401bSmrgstatic void free_resource(struct amdgpu_jpeg_bo *jpeg_bo) 3070ed5401bSmrg{ 3080ed5401bSmrg int r; 3090ed5401bSmrg 3100ed5401bSmrg r = amdgpu_bo_va_op(jpeg_bo->handle, 0, jpeg_bo->size, jpeg_bo->addr, 0, 3110ed5401bSmrg AMDGPU_VA_OP_UNMAP); 3120ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3130ed5401bSmrg 3140ed5401bSmrg r = amdgpu_va_range_free(jpeg_bo->va_handle); 3150ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3160ed5401bSmrg 3170ed5401bSmrg r = amdgpu_bo_free(jpeg_bo->handle); 3180ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 3190ed5401bSmrg memset(jpeg_bo, 0, sizeof(*jpeg_bo)); 3200ed5401bSmrg} 3210ed5401bSmrg 3220ed5401bSmrgstatic void set_reg_jpeg(unsigned reg, unsigned cond, unsigned type, 3230ed5401bSmrg uint32_t val) 3240ed5401bSmrg{ 3250ed5401bSmrg ib_cpu[len++] = RDECODE_PKTJ(reg, cond, type); 3260ed5401bSmrg ib_cpu[len++] = val; 3270ed5401bSmrg} 3280ed5401bSmrg 3290ed5401bSmrg/* send a bitstream buffer command */ 3300ed5401bSmrgstatic void send_cmd_bitstream(uint64_t addr) 3310ed5401bSmrg{ 3320ed5401bSmrg 3330ed5401bSmrg /* jpeg soft reset */ 3340ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); 3350ed5401bSmrg 3360ed5401bSmrg /* ensuring the Reset is asserted in SCLK domain */ 3370ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); 3380ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); 3390ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 3400ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); 3410ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); 3420ed5401bSmrg 3430ed5401bSmrg /* wait mem */ 3440ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); 3450ed5401bSmrg 3460ed5401bSmrg /* ensuring the Reset is de-asserted in SCLK domain */ 3470ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 3480ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); 3490ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); 3500ed5401bSmrg 3510ed5401bSmrg /* set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address */ 3520ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH), COND0, TYPE0, 3530ed5401bSmrg (addr >> 32)); 3540ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW), COND0, TYPE0, 3550ed5401bSmrg (unsigned int)addr); 3560ed5401bSmrg 3570ed5401bSmrg /* set jpeg_rb_base */ 3580ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_RB_BASE), COND0, TYPE0, 0); 3590ed5401bSmrg 3600ed5401bSmrg /* set jpeg_rb_base */ 3610ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_RB_SIZE), COND0, TYPE0, 0xFFFFFFF0); 3620ed5401bSmrg 3630ed5401bSmrg /* set jpeg_rb_wptr */ 3640ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_RB_WPTR), COND0, TYPE0, 3650ed5401bSmrg (JPEG_DEC_BSD_SIZE >> 2)); 3660ed5401bSmrg} 3670ed5401bSmrg 3680ed5401bSmrg/* send a target buffer command */ 3690ed5401bSmrgstatic void send_cmd_target(uint64_t addr) 3700ed5401bSmrg{ 3710ed5401bSmrg 3720ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_PITCH), COND0, TYPE0, 3730ed5401bSmrg (JPEG_DEC_DT_PITCH >> 4)); 3740ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_UV_PITCH), COND0, TYPE0, 3750ed5401bSmrg (JPEG_DEC_DT_PITCH >> 4)); 3760ed5401bSmrg 3770ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0); 3780ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0); 3790ed5401bSmrg 3800ed5401bSmrg /* set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address */ 3810ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH), COND0, 3820ed5401bSmrg TYPE0, (addr >> 32)); 3830ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW), COND0, TYPE0, 3840ed5401bSmrg (unsigned int)addr); 3850ed5401bSmrg 3860ed5401bSmrg /* set output buffer data address */ 3870ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 0); 3880ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, 3890ed5401bSmrg JPEG_DEC_LUMA_OFFSET); 3900ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_INDEX), COND0, TYPE0, 1); 3910ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_DATA), COND0, TYPE0, 3920ed5401bSmrg JPEG_DEC_CHROMA_OFFSET); 3930ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_TIER_CNTL2), COND0, TYPE3, 0); 3940ed5401bSmrg 3950ed5401bSmrg /* set output buffer read pointer */ 3960ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_RPTR), COND0, TYPE0, 0); 3970ed5401bSmrg 3980ed5401bSmrg /* enable error interrupts */ 3990ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_INT_EN), COND0, TYPE0, 0xFFFFFFFE); 4000ed5401bSmrg 4010ed5401bSmrg /* start engine command */ 4020ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x6); 4030ed5401bSmrg 4040ed5401bSmrg /* wait for job completion, wait for job JBSI fetch done */ 4050ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 4060ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 4070ed5401bSmrg (JPEG_DEC_BSD_SIZE >> 2)); 4080ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C2); 4090ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0x01400200); 4100ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_RB_RPTR), COND0, TYPE3, 0xFFFFFFFF); 4110ed5401bSmrg 4120ed5401bSmrg /* wait for job jpeg outbuf idle */ 4130ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 4140ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0xFFFFFFFF); 4150ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_OUTBUF_WPTR), COND0, TYPE3, 4160ed5401bSmrg 0x00000001); 4170ed5401bSmrg 4180ed5401bSmrg /* stop engine */ 4190ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0x4); 4200ed5401bSmrg 4210ed5401bSmrg /* asserting jpeg lmi drop */ 4220ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); 4230ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 4240ed5401bSmrg (1 << 23 | 1 << 0)); 4250ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE1, 0); 4260ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0); 4270ed5401bSmrg 4280ed5401bSmrg /* asserting jpeg reset */ 4290ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 1); 4300ed5401bSmrg 4310ed5401bSmrg /* ensure reset is asserted in sclk domain */ 4320ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 4330ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (1 << 9)); 4340ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); 4350ed5401bSmrg 4360ed5401bSmrg /* de-assert jpeg reset */ 4370ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_JPEG_CNTL), COND0, TYPE0, 0); 4380ed5401bSmrg 4390ed5401bSmrg /* ensure reset is de-asserted in sclk domain */ 4400ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x01C3); 4410ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9)); 4420ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9)); 4430ed5401bSmrg 4440ed5401bSmrg /* de-asserting jpeg lmi drop */ 4450ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_INDEX), COND0, TYPE0, 0x0005); 4460ed5401bSmrg set_reg_jpeg(SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, 0); 4470ed5401bSmrg} 4480ed5401bSmrg 4490ed5401bSmrg/* send a bitstream buffer command */ 4500ed5401bSmrgstatic void send_cmd_bitstream_direct(uint64_t addr) 4510ed5401bSmrg{ 4520ed5401bSmrg 4530ed5401bSmrg /* jpeg soft reset */ 4540ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 1); 4550ed5401bSmrg 4560ed5401bSmrg /* ensuring the Reset is asserted in SCLK domain */ 4570ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); 4580ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0x1 << 0x10)); 4590ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); 4600ed5401bSmrg 4610ed5401bSmrg /* wait mem */ 4620ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DEC_SOFT_RST, COND0, TYPE0, 0); 4630ed5401bSmrg 4640ed5401bSmrg /* ensuring the Reset is de-asserted in SCLK domain */ 4650ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10)); 4660ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10)); 4670ed5401bSmrg 4680ed5401bSmrg /* set UVD_LMI_JPEG_READ_64BIT_BAR_LOW/HIGH based on bitstream buffer address */ 4690ed5401bSmrg set_reg_jpeg(vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH, COND0, TYPE0, 4700ed5401bSmrg (addr >> 32)); 4710ed5401bSmrg set_reg_jpeg(vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW, COND0, TYPE0, addr); 4720ed5401bSmrg 4730ed5401bSmrg /* set jpeg_rb_base */ 4740ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_RB_BASE, COND0, TYPE0, 0); 4750ed5401bSmrg 4760ed5401bSmrg /* set jpeg_rb_base */ 4770ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_RB_SIZE, COND0, TYPE0, 0xFFFFFFF0); 4780ed5401bSmrg 4790ed5401bSmrg /* set jpeg_rb_wptr */ 4800ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_RB_WPTR, COND0, TYPE0, (JPEG_DEC_BSD_SIZE >> 2)); 4810ed5401bSmrg} 4820ed5401bSmrg 4830ed5401bSmrg/* send a target buffer command */ 4840ed5401bSmrgstatic void send_cmd_target_direct(uint64_t addr) 4850ed5401bSmrg{ 4860ed5401bSmrg 4870ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_PITCH, COND0, TYPE0, (JPEG_DEC_DT_PITCH >> 4)); 4880ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_UV_PITCH, COND0, TYPE0, (JPEG_DEC_DT_PITCH >> 4)); 4890ed5401bSmrg 4900ed5401bSmrg set_reg_jpeg(vcnipJPEG_DEC_ADDR_MODE, COND0, TYPE0, 0); 4910ed5401bSmrg set_reg_jpeg(vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0); 4920ed5401bSmrg set_reg_jpeg(vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0); 4930ed5401bSmrg 4940ed5401bSmrg /* set UVD_LMI_JPEG_WRITE_64BIT_BAR_LOW/HIGH based on target buffer address */ 4950ed5401bSmrg set_reg_jpeg(vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH, COND0, TYPE0, 4960ed5401bSmrg (addr >> 32)); 4970ed5401bSmrg set_reg_jpeg(vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW, COND0, TYPE0, addr); 4980ed5401bSmrg 4990ed5401bSmrg /* set output buffer data address */ 5000ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_INDEX, COND0, TYPE0, 0); 5010ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DATA, COND0, TYPE0, JPEG_DEC_LUMA_OFFSET); 5020ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_INDEX, COND0, TYPE0, 1); 5030ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_DATA, COND0, TYPE0, JPEG_DEC_CHROMA_OFFSET); 5040ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_TIER_CNTL2, COND0, 0, 0); 5050ed5401bSmrg 5060ed5401bSmrg /* set output buffer read pointer */ 5070ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_OUTBUF_RPTR, COND0, TYPE0, 0); 5080ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_OUTBUF_CNTL, COND0, TYPE0, 5090ed5401bSmrg ((0x00001587 & (~0x00000180L)) | (0x1 << 0x7) | (0x1 << 0x6))); 5100ed5401bSmrg 5110ed5401bSmrg /* enable error interrupts */ 5120ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_INT_EN, COND0, TYPE0, 0xFFFFFFFE); 5130ed5401bSmrg 5140ed5401bSmrg /* start engine command */ 5150ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0xE); 5160ed5401bSmrg 5170ed5401bSmrg /* wait for job completion, wait for job JBSI fetch done */ 5180ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, 5190ed5401bSmrg (JPEG_DEC_BSD_SIZE >> 2)); 5200ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_COND_RD_TIMER, COND0, TYPE0, 0x01400200); 5210ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_RB_RPTR, COND3, TYPE3, 0xFFFFFFFF); 5220ed5401bSmrg 5230ed5401bSmrg /* wait for job jpeg outbuf idle */ 5240ed5401bSmrg set_reg_jpeg(vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, 0xFFFFFFFF); 5250ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_OUTBUF_WPTR, COND3, TYPE3, 0x00000001); 5260ed5401bSmrg 5270ed5401bSmrg /* stop engine */ 5280ed5401bSmrg set_reg_jpeg(vcnipUVD_JPEG_CNTL, COND0, TYPE0, 0x4); 5290ed5401bSmrg} 5300ed5401bSmrg 5310ed5401bSmrgstatic void amdgpu_cs_jpeg_decode(void) 5320ed5401bSmrg{ 5330ed5401bSmrg 5340ed5401bSmrg struct amdgpu_jpeg_bo dec_buf; 5350ed5401bSmrg int size, r; 5360ed5401bSmrg uint8_t *dec; 5370ed5401bSmrg int sum = 0, i, j; 5380ed5401bSmrg 5390ed5401bSmrg size = 16 * 1024; /* 8K bitstream + 8K output */ 5400ed5401bSmrg num_resources = 0; 5410ed5401bSmrg alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_VRAM); 5420ed5401bSmrg resources[num_resources++] = dec_buf.handle; 5430ed5401bSmrg resources[num_resources++] = ib_handle; 5440ed5401bSmrg r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr); 5450ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 5460ed5401bSmrg memcpy(dec_buf.ptr, jpeg_bitstream, sizeof(jpeg_bitstream)); 5470ed5401bSmrg 5480ed5401bSmrg len = 0; 5490ed5401bSmrg 5500ed5401bSmrg if (jpeg_direct_reg == true) { 5510ed5401bSmrg send_cmd_bitstream_direct(dec_buf.addr); 5520ed5401bSmrg send_cmd_target_direct(dec_buf.addr + (size / 2)); 5530ed5401bSmrg } else { 5540ed5401bSmrg send_cmd_bitstream(dec_buf.addr); 5550ed5401bSmrg send_cmd_target(dec_buf.addr + (size / 2)); 5560ed5401bSmrg } 5570ed5401bSmrg 5580ed5401bSmrg amdgpu_bo_cpu_unmap(dec_buf.handle); 5590ed5401bSmrg r = submit(len, AMDGPU_HW_IP_VCN_JPEG); 5600ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 5610ed5401bSmrg 5620ed5401bSmrg r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr); 5630ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 5640ed5401bSmrg 5650ed5401bSmrg dec = dec_buf.ptr + (size / 2); 5660ed5401bSmrg 5670ed5401bSmrg /* calculate result checksum */ 5680ed5401bSmrg for (i = 0; i < 8; i++) 5690ed5401bSmrg for (j = 0; j < 8; j++) 5700ed5401bSmrg sum += *((dec + JPEG_DEC_LUMA_OFFSET + i * JPEG_DEC_DT_PITCH) + j); 5710ed5401bSmrg for (i = 0; i < 4; i++) 5720ed5401bSmrg for (j = 0; j < 8; j++) 5730ed5401bSmrg sum += *((dec + JPEG_DEC_CHROMA_OFFSET + i * JPEG_DEC_DT_PITCH) + j); 5740ed5401bSmrg 5750ed5401bSmrg amdgpu_bo_cpu_unmap(dec_buf.handle); 5760ed5401bSmrg CU_ASSERT_EQUAL(sum, JPEG_DEC_SUM); 5770ed5401bSmrg 5780ed5401bSmrg free_resource(&dec_buf); 5790ed5401bSmrg} 580