basic_tests.c revision 88f8a8d2
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 2788f8a8d2Smrg#include <sys/types.h> 2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS 2988f8a8d2Smrg#include <sys/sysmacros.h> 3088f8a8d2Smrg#endif 3188f8a8d2Smrg#include <sys/stat.h> 3288f8a8d2Smrg#include <fcntl.h> 333f012e29Smrg#ifdef HAVE_ALLOCA_H 343f012e29Smrg# include <alloca.h> 353f012e29Smrg#endif 3600a23bdaSmrg#include <sys/wait.h> 373f012e29Smrg 383f012e29Smrg#include "CUnit/Basic.h" 393f012e29Smrg 403f012e29Smrg#include "amdgpu_test.h" 413f012e29Smrg#include "amdgpu_drm.h" 427cdc0497Smrg#include "util_math.h" 433f012e29Smrg 443f012e29Smrgstatic amdgpu_device_handle device_handle; 453f012e29Smrgstatic uint32_t major_version; 463f012e29Smrgstatic uint32_t minor_version; 47d8807b2fSmrgstatic uint32_t family_id; 483f012e29Smrg 493f012e29Smrgstatic void amdgpu_query_info_test(void); 503f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 513f012e29Smrgstatic void amdgpu_command_submission_compute(void); 52d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 533f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 543f012e29Smrgstatic void amdgpu_userptr_test(void); 553f012e29Smrgstatic void amdgpu_semaphore_test(void); 5600a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 5700a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 5888f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void); 5988f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void); 605324fb0dSmrgstatic void amdgpu_draw_test(void); 6188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void); 623f012e29Smrg 633f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 643f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 653f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 6600a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 6700a23bdaSmrg unsigned ip_type, 6800a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 6900a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 7000a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 7100a23bdaSmrg struct amdgpu_cs_request *ibs_request); 7200a23bdaSmrg 733f012e29SmrgCU_TestInfo basic_tests[] = { 743f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 753f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 7600a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 773f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 783f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 79d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 803f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 813f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 8200a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 8388f8a8d2Smrg { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 8488f8a8d2Smrg { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 855324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 8688f8a8d2Smrg { "GPU reset Test", amdgpu_gpu_reset_test }, 873f012e29Smrg CU_TEST_INFO_NULL, 883f012e29Smrg}; 893f012e29Smrg#define BUFFER_SIZE (8 * 1024) 903f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 913f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 923f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 933f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 943f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 953f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 963f012e29Smrg /* 0 = byte fill 973f012e29Smrg * 2 = DW fill 983f012e29Smrg */ 993f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1003f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 1013f012e29Smrg (((op) & 0xFF) << 0)) 1023f012e29Smrg#define SDMA_OPCODE_WRITE 2 1033f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1043f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 1053f012e29Smrg 1063f012e29Smrg#define SDMA_OPCODE_COPY 1 1073f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 1083f012e29Smrg 1093f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1103f012e29Smrg#define SDMA_NOP 0x0 1113f012e29Smrg 1123f012e29Smrg/* PM4 */ 1133f012e29Smrg#define PACKET_TYPE0 0 1143f012e29Smrg#define PACKET_TYPE1 1 1153f012e29Smrg#define PACKET_TYPE2 2 1163f012e29Smrg#define PACKET_TYPE3 3 1173f012e29Smrg 1183f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1193f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1203f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1213f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1223f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1233f012e29Smrg ((reg) & 0xFFFF) | \ 1243f012e29Smrg ((n) & 0x3FFF) << 16) 1253f012e29Smrg#define CP_PACKET2 0x80000000 1263f012e29Smrg#define PACKET2_PAD_SHIFT 0 1273f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1283f012e29Smrg 1293f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1303f012e29Smrg 1313f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1323f012e29Smrg (((op) & 0xFF) << 8) | \ 1333f012e29Smrg ((n) & 0x3FFF) << 16) 1345324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1353f012e29Smrg 1363f012e29Smrg/* Packet 3 types */ 1373f012e29Smrg#define PACKET3_NOP 0x10 1383f012e29Smrg 1393f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1403f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1413f012e29Smrg /* 0 - register 1423f012e29Smrg * 1 - memory (sync - via GRBM) 1433f012e29Smrg * 2 - gl2 1443f012e29Smrg * 3 - gds 1453f012e29Smrg * 4 - reserved 1463f012e29Smrg * 5 - memory (async - direct) 1473f012e29Smrg */ 1483f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1493f012e29Smrg#define WR_CONFIRM (1 << 20) 1503f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1513f012e29Smrg /* 0 - LRU 1523f012e29Smrg * 1 - Stream 1533f012e29Smrg */ 1543f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1553f012e29Smrg /* 0 - me 1563f012e29Smrg * 1 - pfp 1573f012e29Smrg * 2 - ce 1583f012e29Smrg */ 1593f012e29Smrg 1603f012e29Smrg#define PACKET3_DMA_DATA 0x50 1613f012e29Smrg/* 1. header 1623f012e29Smrg * 2. CONTROL 1633f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1643f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1653f012e29Smrg * 5. DST_ADDR_LO [31:0] 1663f012e29Smrg * 6. DST_ADDR_HI [7:0] 1673f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1683f012e29Smrg */ 1693f012e29Smrg/* CONTROL */ 1703f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 1713f012e29Smrg /* 0 - ME 1723f012e29Smrg * 1 - PFP 1733f012e29Smrg */ 1743f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 1753f012e29Smrg /* 0 - LRU 1763f012e29Smrg * 1 - Stream 1773f012e29Smrg * 2 - Bypass 1783f012e29Smrg */ 1793f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 1803f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 1813f012e29Smrg /* 0 - DST_ADDR using DAS 1823f012e29Smrg * 1 - GDS 1833f012e29Smrg * 3 - DST_ADDR using L2 1843f012e29Smrg */ 1853f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 1863f012e29Smrg /* 0 - LRU 1873f012e29Smrg * 1 - Stream 1883f012e29Smrg * 2 - Bypass 1893f012e29Smrg */ 1903f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 1913f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 1923f012e29Smrg /* 0 - SRC_ADDR using SAS 1933f012e29Smrg * 1 - GDS 1943f012e29Smrg * 2 - DATA 1953f012e29Smrg * 3 - SRC_ADDR using L2 1963f012e29Smrg */ 1973f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 1983f012e29Smrg/* COMMAND */ 1993f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 2003f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 2013f012e29Smrg /* 0 - none 2023f012e29Smrg * 1 - 8 in 16 2033f012e29Smrg * 2 - 8 in 32 2043f012e29Smrg * 3 - 8 in 64 2053f012e29Smrg */ 2063f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 2073f012e29Smrg /* 0 - none 2083f012e29Smrg * 1 - 8 in 16 2093f012e29Smrg * 2 - 8 in 32 2103f012e29Smrg * 3 - 8 in 64 2113f012e29Smrg */ 2123f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2133f012e29Smrg /* 0 - memory 2143f012e29Smrg * 1 - register 2153f012e29Smrg */ 2163f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2173f012e29Smrg /* 0 - memory 2183f012e29Smrg * 1 - register 2193f012e29Smrg */ 2203f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2213f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2223f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2233f012e29Smrg 224d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 225d8807b2fSmrg (((b) & 0x1) << 26) | \ 226d8807b2fSmrg (((t) & 0x1) << 23) | \ 227d8807b2fSmrg (((s) & 0x1) << 22) | \ 228d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 229d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 230d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 231d8807b2fSmrg#define SDMA_NOP_SI 0xf 232d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 233d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 234d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 235d8807b2fSmrg /* 0 - ME 236d8807b2fSmrg * 1 - PFP 237d8807b2fSmrg */ 238d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 239d8807b2fSmrg /* 0 - DST_ADDR using DAS 240d8807b2fSmrg * 1 - GDS 241d8807b2fSmrg * 3 - DST_ADDR using L2 242d8807b2fSmrg */ 243d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 244d8807b2fSmrg /* 0 - SRC_ADDR using SAS 245d8807b2fSmrg * 1 - GDS 246d8807b2fSmrg * 2 - DATA 247d8807b2fSmrg * 3 - SRC_ADDR using L2 248d8807b2fSmrg */ 249d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 250d8807b2fSmrg 25100a23bdaSmrg 25200a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 25300a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 25400a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 25500a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 25600a23bdaSmrg 25700a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 25800a23bdaSmrg 25900a23bdaSmrg#define PKT3_SET_SH_REG 0x76 26000a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 26100a23bdaSmrg 26200a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2635324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2645324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 2655324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 2665324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 2675324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 26800a23bdaSmrg/* gfx 8 */ 26900a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 27000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 27100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 27200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 27300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 27400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 27500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 27600a23bdaSmrg 27700a23bdaSmrg 27800a23bdaSmrg 27900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 28000a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 28100a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 28200a23bdaSmrg ((num & 0x000000ff) << 24)) 28300a23bdaSmrg 28400a23bdaSmrg 28500a23bdaSmrg/* Shader code 28600a23bdaSmrg * void main() 28700a23bdaSmrg{ 28800a23bdaSmrg 28900a23bdaSmrg float x = some_input; 29000a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 29100a23bdaSmrg x = sin(x); 29200a23bdaSmrg 29300a23bdaSmrg u[0] = 42u; 29400a23bdaSmrg} 29500a23bdaSmrg*/ 29600a23bdaSmrg 29700a23bdaSmrgstatic uint32_t shader_bin[] = { 29800a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 29900a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 30000a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 30100a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 30200a23bdaSmrg}; 30300a23bdaSmrg 30400a23bdaSmrg#define CODE_OFFSET 512 30500a23bdaSmrg#define DATA_OFFSET 1024 30600a23bdaSmrg 3075324fb0dSmrgenum cs_type { 3085324fb0dSmrg CS_BUFFERCLEAR, 3095324fb0dSmrg CS_BUFFERCOPY 3105324fb0dSmrg}; 3115324fb0dSmrg 3125324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3135324fb0dSmrg 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 3145324fb0dSmrg 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 3155324fb0dSmrg 0xBF810000 3165324fb0dSmrg}; 3175324fb0dSmrg 3185324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3195324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3205324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3215324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3225324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3235324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3245324fb0dSmrg}; 3255324fb0dSmrg 3265324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3275324fb0dSmrg 3285324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3295324fb0dSmrg 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 3305324fb0dSmrg 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 3315324fb0dSmrg}; 3325324fb0dSmrg 3335324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3345324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3355324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3365324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3375324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3385324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3395324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3405324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3415324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 34288f8a8d2Smrg 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 3435324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3445324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3455324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3465324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3475324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3485324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 34988f8a8d2Smrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 35088f8a8d2Smrg 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 3515324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 3525324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 3535324fb0dSmrg 0xc0017900, 0x24b, 0x0 3545324fb0dSmrg}; 3555324fb0dSmrg 3565324fb0dSmrgenum ps_type { 3575324fb0dSmrg PS_CONST, 3585324fb0dSmrg PS_TEX 3595324fb0dSmrg}; 3605324fb0dSmrg 3615324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 3625324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 3635324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 3645324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 3655324fb0dSmrg}; 3665324fb0dSmrg 3675324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 3685324fb0dSmrg 3695324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 3705324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 3715324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 3725324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 3735324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 3745324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3755324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3765324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3775324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3785324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3795324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 3805324fb0dSmrg } 3815324fb0dSmrg}; 3825324fb0dSmrg 3835324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 3845324fb0dSmrg 0x00000004 3855324fb0dSmrg}; 3865324fb0dSmrg 3875324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 3885324fb0dSmrg 3895324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 3905324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 3915324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 3925324fb0dSmrg}; 3935324fb0dSmrg 3945324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 3955324fb0dSmrg 3965324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 3975324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 3985324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 3995324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4005324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4015324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4025324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4035324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4045324fb0dSmrg}; 4055324fb0dSmrg 4065324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 4075324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 4085324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 4095324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 4105324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 4115324fb0dSmrg 0x00000100, 0xBF810000 4125324fb0dSmrg}; 4135324fb0dSmrg 4145324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 4155324fb0dSmrg 0x0000000B 4165324fb0dSmrg}; 4175324fb0dSmrg 4185324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 4195324fb0dSmrg 4205324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 4215324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4225324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 4235324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 4245324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 4255324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4265324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4275324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4285324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4295324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4305324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 4315324fb0dSmrg } 4325324fb0dSmrg}; 4335324fb0dSmrg 4345324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 4355324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 4365324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 4375324fb0dSmrg}; 4385324fb0dSmrg 4395324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 4405324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4415324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 4425324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4435324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4445324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4455324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4465324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4475324fb0dSmrg}; 4485324fb0dSmrg 4495324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 4505324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 4515324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 4525324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 4535324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 4545324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 4555324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 4565324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 4575324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 4585324fb0dSmrg}; 4595324fb0dSmrg 4605324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 4615324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 4625324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 4635324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 46488f8a8d2Smrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x1a, 4655324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 4665324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 4675324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 4685324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 4695324fb0dSmrg}; 47000a23bdaSmrg 4717cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 4727cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 4737cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 4747cdc0497Smrg uint64_t *mc_address, 4757cdc0497Smrg amdgpu_va_handle *va_handle) 4767cdc0497Smrg{ 4777cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 4787cdc0497Smrg amdgpu_bo_handle buf_handle; 4797cdc0497Smrg amdgpu_va_handle handle; 4807cdc0497Smrg uint64_t vmc_addr; 4817cdc0497Smrg int r; 4827cdc0497Smrg 4837cdc0497Smrg request.alloc_size = size; 4847cdc0497Smrg request.phys_alignment = alignment; 4857cdc0497Smrg request.preferred_heap = heap; 4867cdc0497Smrg request.flags = alloc_flags; 4877cdc0497Smrg 4887cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 4897cdc0497Smrg if (r) 4907cdc0497Smrg return r; 4917cdc0497Smrg 4927cdc0497Smrg r = amdgpu_va_range_alloc(dev, 4937cdc0497Smrg amdgpu_gpu_va_range_general, 4947cdc0497Smrg size, alignment, 0, &vmc_addr, 4957cdc0497Smrg &handle, 0); 4967cdc0497Smrg if (r) 4977cdc0497Smrg goto error_va_alloc; 4987cdc0497Smrg 4997cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 5007cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 5017cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 5027cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 5037cdc0497Smrg mapping_flags, 5047cdc0497Smrg AMDGPU_VA_OP_MAP); 5057cdc0497Smrg if (r) 5067cdc0497Smrg goto error_va_map; 5077cdc0497Smrg 5087cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 5097cdc0497Smrg if (r) 5107cdc0497Smrg goto error_cpu_map; 5117cdc0497Smrg 5127cdc0497Smrg *bo = buf_handle; 5137cdc0497Smrg *mc_address = vmc_addr; 5147cdc0497Smrg *va_handle = handle; 5157cdc0497Smrg 5167cdc0497Smrg return 0; 5177cdc0497Smrg 5187cdc0497Smrg error_cpu_map: 5197cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 5207cdc0497Smrg 5217cdc0497Smrg error_va_map: 5227cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 5237cdc0497Smrg 5247cdc0497Smrg error_va_alloc: 5257cdc0497Smrg amdgpu_bo_free(buf_handle); 5267cdc0497Smrg return r; 5277cdc0497Smrg} 5287cdc0497Smrg 5297cdc0497Smrg 5307cdc0497Smrg 5313f012e29Smrgint suite_basic_tests_init(void) 5323f012e29Smrg{ 533d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 5343f012e29Smrg int r; 5353f012e29Smrg 5363f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 5373f012e29Smrg &minor_version, &device_handle); 5383f012e29Smrg 539d8807b2fSmrg if (r) { 540037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 541037b3c26Smrg printf("\n\nError:%s. " 542037b3c26Smrg "Hint:Try to run this test program as root.", 543037b3c26Smrg strerror(errno)); 5443f012e29Smrg return CUE_SINIT_FAILED; 545037b3c26Smrg } 546d8807b2fSmrg 547d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 548d8807b2fSmrg if (r) 549d8807b2fSmrg return CUE_SINIT_FAILED; 550d8807b2fSmrg 551d8807b2fSmrg family_id = gpu_info.family_id; 552d8807b2fSmrg 553d8807b2fSmrg return CUE_SUCCESS; 5543f012e29Smrg} 5553f012e29Smrg 5563f012e29Smrgint suite_basic_tests_clean(void) 5573f012e29Smrg{ 5583f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 5593f012e29Smrg 5603f012e29Smrg if (r == 0) 5613f012e29Smrg return CUE_SUCCESS; 5623f012e29Smrg else 5633f012e29Smrg return CUE_SCLEAN_FAILED; 5643f012e29Smrg} 5653f012e29Smrg 5663f012e29Smrgstatic void amdgpu_query_info_test(void) 5673f012e29Smrg{ 5683f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 5693f012e29Smrg uint32_t version, feature; 5703f012e29Smrg int r; 5713f012e29Smrg 5723f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 5733f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5743f012e29Smrg 5753f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 5763f012e29Smrg 0, &version, &feature); 5773f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5783f012e29Smrg} 5793f012e29Smrg 5803f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 5813f012e29Smrg{ 5823f012e29Smrg amdgpu_context_handle context_handle; 5833f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 5843f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 5853f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 5863f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 5873f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 5883f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 5893f012e29Smrg uint32_t *ptr; 5903f012e29Smrg uint32_t expired; 5913f012e29Smrg amdgpu_bo_list_handle bo_list; 5923f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 593d8807b2fSmrg int r, i = 0; 5943f012e29Smrg 5953f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 5963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5973f012e29Smrg 5983f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 5993f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6003f012e29Smrg &ib_result_handle, &ib_result_cpu, 6013f012e29Smrg &ib_result_mc_address, &va_handle); 6023f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6033f012e29Smrg 6043f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 6053f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6063f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 6073f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 6083f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6093f012e29Smrg 6103f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 6113f012e29Smrg ib_result_ce_handle, &bo_list); 6123f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6133f012e29Smrg 6143f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 6153f012e29Smrg 6163f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 6173f012e29Smrg ptr = ib_result_ce_cpu; 618d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 619d8807b2fSmrg ptr[i++] = 0xc0008900; 620d8807b2fSmrg ptr[i++] = 0; 621d8807b2fSmrg } 622d8807b2fSmrg ptr[i++] = 0xc0008400; 623d8807b2fSmrg ptr[i++] = 1; 6243f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 625d8807b2fSmrg ib_info[0].size = i; 6263f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 6273f012e29Smrg 6283f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 6293f012e29Smrg ptr = ib_result_cpu; 6303f012e29Smrg ptr[0] = 0xc0008600; 6313f012e29Smrg ptr[1] = 0x00000001; 6323f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 6333f012e29Smrg ib_info[1].size = 2; 6343f012e29Smrg 6353f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 6363f012e29Smrg ibs_request.number_of_ibs = 2; 6373f012e29Smrg ibs_request.ibs = ib_info; 6383f012e29Smrg ibs_request.resources = bo_list; 6393f012e29Smrg ibs_request.fence_info.handle = NULL; 6403f012e29Smrg 6413f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 6423f012e29Smrg 6433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6443f012e29Smrg 6453f012e29Smrg fence_status.context = context_handle; 6463f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 6473f012e29Smrg fence_status.ip_instance = 0; 6483f012e29Smrg fence_status.fence = ibs_request.seq_no; 6493f012e29Smrg 6503f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 6513f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 6523f012e29Smrg 0, &expired); 6533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6543f012e29Smrg 6553f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 6563f012e29Smrg ib_result_mc_address, 4096); 6573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6583f012e29Smrg 6593f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 6603f012e29Smrg ib_result_ce_mc_address, 4096); 6613f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6623f012e29Smrg 6633f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 6643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6653f012e29Smrg 6663f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 6673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6683f012e29Smrg 6693f012e29Smrg} 6703f012e29Smrg 6713f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 6723f012e29Smrg{ 6733f012e29Smrg amdgpu_context_handle context_handle; 6743f012e29Smrg amdgpu_bo_handle ib_result_handle; 6753f012e29Smrg void *ib_result_cpu; 6763f012e29Smrg uint64_t ib_result_mc_address; 6773f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 6783f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 6793f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 6803f012e29Smrg uint32_t *ptr; 6813f012e29Smrg uint32_t expired; 6823f012e29Smrg amdgpu_bo_list_handle bo_list; 6833f012e29Smrg amdgpu_va_handle va_handle; 684d8807b2fSmrg int r, i = 0; 6853f012e29Smrg 6863f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 6873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6883f012e29Smrg 6893f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 6903f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6913f012e29Smrg &ib_result_handle, &ib_result_cpu, 6923f012e29Smrg &ib_result_mc_address, &va_handle); 6933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6943f012e29Smrg 6953f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 6963f012e29Smrg &bo_list); 6973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6983f012e29Smrg 6993f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 7003f012e29Smrg 7013f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 7023f012e29Smrg ptr = ib_result_cpu; 703d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 704d8807b2fSmrg ptr[i++] = 0xc0008900; 705d8807b2fSmrg ptr[i++] = 0; 706d8807b2fSmrg } 707d8807b2fSmrg ptr[i++] = 0xc0008400; 708d8807b2fSmrg ptr[i++] = 1; 7093f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 710d8807b2fSmrg ib_info[0].size = i; 7113f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 7123f012e29Smrg 7133f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 7143f012e29Smrg ptr[0] = 0xc0008600; 7153f012e29Smrg ptr[1] = 0x00000001; 7163f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 7173f012e29Smrg ib_info[1].size = 2; 7183f012e29Smrg 7193f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 7203f012e29Smrg ibs_request.number_of_ibs = 2; 7213f012e29Smrg ibs_request.ibs = ib_info; 7223f012e29Smrg ibs_request.resources = bo_list; 7233f012e29Smrg ibs_request.fence_info.handle = NULL; 7243f012e29Smrg 7253f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 7263f012e29Smrg 7273f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7283f012e29Smrg 7293f012e29Smrg fence_status.context = context_handle; 7303f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7313f012e29Smrg fence_status.ip_instance = 0; 7323f012e29Smrg fence_status.fence = ibs_request.seq_no; 7333f012e29Smrg 7343f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7353f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7363f012e29Smrg 0, &expired); 7373f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7383f012e29Smrg 7393f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7403f012e29Smrg ib_result_mc_address, 4096); 7413f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7423f012e29Smrg 7433f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 7443f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7453f012e29Smrg 7463f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 7473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7483f012e29Smrg} 7493f012e29Smrg 7503f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 7513f012e29Smrg{ 7523f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 7533f012e29Smrg} 7543f012e29Smrg 7553f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 7563f012e29Smrg{ 7573f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 7583f012e29Smrg} 7593f012e29Smrg 7603f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 7613f012e29Smrg{ 7623f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 7633f012e29Smrg} 7643f012e29Smrg 76500a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 76600a23bdaSmrg{ 76700a23bdaSmrg const int sdma_write_length = 1024; 76800a23bdaSmrg const int pm4_dw = 256; 76900a23bdaSmrg amdgpu_context_handle context_handle; 77000a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 77100a23bdaSmrg amdgpu_bo_handle *resources; 77200a23bdaSmrg uint32_t *pm4; 77300a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 77400a23bdaSmrg struct amdgpu_cs_request *ibs_request; 77500a23bdaSmrg uint64_t bo1_mc, bo2_mc; 77600a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 77700a23bdaSmrg int i, j, r, loop1, loop2; 77800a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 77900a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 78000a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 78100a23bdaSmrg 78200a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 78300a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 78400a23bdaSmrg 78500a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 78600a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 78700a23bdaSmrg 78800a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 78900a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 79000a23bdaSmrg 79100a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 79200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 79300a23bdaSmrg 79400a23bdaSmrg /* prepare resource */ 79500a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 79600a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 79700a23bdaSmrg 79800a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 79900a23bdaSmrg 0, &vram_info); 80000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80100a23bdaSmrg 80200a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 80300a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 80400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 80600a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 80700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80800a23bdaSmrg 80900a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 81000a23bdaSmrg 0, >t_info); 81100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 81200a23bdaSmrg 81300a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 81400a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 81500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 81600a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 81700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 81800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 81900a23bdaSmrg 82000a23bdaSmrg 82100a23bdaSmrg 82200a23bdaSmrg loop1 = loop2 = 0; 82300a23bdaSmrg /* run 9 circle to test all mapping combination */ 82400a23bdaSmrg while(loop1 < 2) { 82500a23bdaSmrg while(loop2 < 2) { 82600a23bdaSmrg /* allocate UC bo1for sDMA use */ 82700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 82800a23bdaSmrg sdma_write_length, 4096, 82900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 83000a23bdaSmrg gtt_flags[loop1], &bo1, 83100a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 83200a23bdaSmrg &bo1_va_handle); 83300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 83400a23bdaSmrg 83500a23bdaSmrg /* set bo1 */ 83600a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 83700a23bdaSmrg 83800a23bdaSmrg /* allocate UC bo2 for sDMA use */ 83900a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 84000a23bdaSmrg sdma_write_length, 4096, 84100a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 84200a23bdaSmrg gtt_flags[loop2], &bo2, 84300a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 84400a23bdaSmrg &bo2_va_handle); 84500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 84600a23bdaSmrg 84700a23bdaSmrg /* clear bo2 */ 84800a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 84900a23bdaSmrg 85000a23bdaSmrg resources[0] = bo1; 85100a23bdaSmrg resources[1] = bo2; 85200a23bdaSmrg resources[2] = vram_max[loop2]; 85300a23bdaSmrg resources[3] = gtt_max[loop2]; 85400a23bdaSmrg 85500a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 85600a23bdaSmrg i = j = 0; 85700a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 85800a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 85900a23bdaSmrg sdma_write_length); 86000a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 86100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 86200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 86300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 86400a23bdaSmrg } else { 86500a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 86600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 86700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 86800a23bdaSmrg else 86900a23bdaSmrg pm4[i++] = sdma_write_length; 87000a23bdaSmrg pm4[i++] = 0; 87100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 87200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 87300a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 87400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 87500a23bdaSmrg } 87600a23bdaSmrg 87700a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 87800a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 87900a23bdaSmrg i, pm4, 88000a23bdaSmrg 4, resources, 88100a23bdaSmrg ib_info, ibs_request); 88200a23bdaSmrg 88300a23bdaSmrg /* verify if SDMA test result meets with expected */ 88400a23bdaSmrg i = 0; 88500a23bdaSmrg while(i < sdma_write_length) { 88600a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 88700a23bdaSmrg } 88800a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 88900a23bdaSmrg sdma_write_length); 89000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 89100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 89200a23bdaSmrg sdma_write_length); 89300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 89400a23bdaSmrg loop2++; 89500a23bdaSmrg } 89600a23bdaSmrg loop2 = 0; 89700a23bdaSmrg loop1++; 89800a23bdaSmrg } 89900a23bdaSmrg amdgpu_bo_free(vram_max[0]); 90000a23bdaSmrg amdgpu_bo_free(vram_max[1]); 90100a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 90200a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 90300a23bdaSmrg /* clean resources */ 90400a23bdaSmrg free(resources); 90500a23bdaSmrg free(ibs_request); 90600a23bdaSmrg free(ib_info); 90700a23bdaSmrg free(pm4); 90800a23bdaSmrg 90900a23bdaSmrg /* end of test */ 91000a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 91100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 91200a23bdaSmrg} 91300a23bdaSmrg 91400a23bdaSmrg 9153f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 9163f012e29Smrg{ 9173f012e29Smrg /* write data using the CP */ 9183f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 9193f012e29Smrg /* const fill using the CP */ 9203f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 9213f012e29Smrg /* copy data using the CP */ 9223f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 9233f012e29Smrg /* separate IB buffers for multi-IB submission */ 9243f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 9253f012e29Smrg /* shared IB buffer for multi-IB submission */ 9263f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 9273f012e29Smrg} 9283f012e29Smrg 9293f012e29Smrgstatic void amdgpu_semaphore_test(void) 9303f012e29Smrg{ 9313f012e29Smrg amdgpu_context_handle context_handle[2]; 9323f012e29Smrg amdgpu_semaphore_handle sem; 9333f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 9343f012e29Smrg void *ib_result_cpu[2]; 9353f012e29Smrg uint64_t ib_result_mc_address[2]; 9363f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 9373f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 9383f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 9393f012e29Smrg uint32_t *ptr; 9403f012e29Smrg uint32_t expired; 941d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 9423f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 9433f012e29Smrg amdgpu_va_handle va_handle[2]; 9443f012e29Smrg int r, i; 9453f012e29Smrg 946d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 947d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 948d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 949d8807b2fSmrg } else { 950d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 951d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 952d8807b2fSmrg } 953d8807b2fSmrg 9543f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 9553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9563f012e29Smrg for (i = 0; i < 2; i++) { 9573f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 9583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9593f012e29Smrg 9603f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 9613f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 9623f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 9633f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 9643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9653f012e29Smrg 9663f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 9673f012e29Smrg NULL, &bo_list[i]); 9683f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9693f012e29Smrg } 9703f012e29Smrg 9713f012e29Smrg /* 1. same context different engine */ 9723f012e29Smrg ptr = ib_result_cpu[0]; 973d8807b2fSmrg ptr[0] = sdma_nop; 9743f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 9753f012e29Smrg ib_info[0].size = 1; 9763f012e29Smrg 9773f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 9783f012e29Smrg ibs_request[0].number_of_ibs = 1; 9793f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 9803f012e29Smrg ibs_request[0].resources = bo_list[0]; 9813f012e29Smrg ibs_request[0].fence_info.handle = NULL; 9823f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 9833f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9843f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 9853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9863f012e29Smrg 9873f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 9883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9893f012e29Smrg ptr = ib_result_cpu[1]; 990d8807b2fSmrg ptr[0] = gfx_nop; 9913f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 9923f012e29Smrg ib_info[1].size = 1; 9933f012e29Smrg 9943f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 9953f012e29Smrg ibs_request[1].number_of_ibs = 1; 9963f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 9973f012e29Smrg ibs_request[1].resources = bo_list[1]; 9983f012e29Smrg ibs_request[1].fence_info.handle = NULL; 9993f012e29Smrg 10003f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 10013f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10023f012e29Smrg 10033f012e29Smrg fence_status.context = context_handle[0]; 10043f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 10053f012e29Smrg fence_status.ip_instance = 0; 10063f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 10073f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10083f012e29Smrg 500000000, 0, &expired); 10093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10103f012e29Smrg CU_ASSERT_EQUAL(expired, true); 10113f012e29Smrg 10123f012e29Smrg /* 2. same engine different context */ 10133f012e29Smrg ptr = ib_result_cpu[0]; 1014d8807b2fSmrg ptr[0] = gfx_nop; 10153f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 10163f012e29Smrg ib_info[0].size = 1; 10173f012e29Smrg 10183f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 10193f012e29Smrg ibs_request[0].number_of_ibs = 1; 10203f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 10213f012e29Smrg ibs_request[0].resources = bo_list[0]; 10223f012e29Smrg ibs_request[0].fence_info.handle = NULL; 10233f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 10243f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10253f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 10263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10273f012e29Smrg 10283f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 10293f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10303f012e29Smrg ptr = ib_result_cpu[1]; 1031d8807b2fSmrg ptr[0] = gfx_nop; 10323f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 10333f012e29Smrg ib_info[1].size = 1; 10343f012e29Smrg 10353f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 10363f012e29Smrg ibs_request[1].number_of_ibs = 1; 10373f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 10383f012e29Smrg ibs_request[1].resources = bo_list[1]; 10393f012e29Smrg ibs_request[1].fence_info.handle = NULL; 10403f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 10413f012e29Smrg 10423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10433f012e29Smrg 10443f012e29Smrg fence_status.context = context_handle[1]; 10453f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 10463f012e29Smrg fence_status.ip_instance = 0; 10473f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 10483f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10493f012e29Smrg 500000000, 0, &expired); 10503f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10513f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1052d8807b2fSmrg 10533f012e29Smrg for (i = 0; i < 2; i++) { 10543f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 10553f012e29Smrg ib_result_mc_address[i], 4096); 10563f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10573f012e29Smrg 10583f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 10593f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10603f012e29Smrg 10613f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 10623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10633f012e29Smrg } 10643f012e29Smrg 10653f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 10663f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10673f012e29Smrg} 10683f012e29Smrg 10693f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 10703f012e29Smrg{ 10713f012e29Smrg amdgpu_context_handle context_handle; 10723f012e29Smrg amdgpu_bo_handle ib_result_handle; 10733f012e29Smrg void *ib_result_cpu; 10743f012e29Smrg uint64_t ib_result_mc_address; 10753f012e29Smrg struct amdgpu_cs_request ibs_request; 10763f012e29Smrg struct amdgpu_cs_ib_info ib_info; 10773f012e29Smrg struct amdgpu_cs_fence fence_status; 10783f012e29Smrg uint32_t *ptr; 10793f012e29Smrg uint32_t expired; 108000a23bdaSmrg int r, instance; 10813f012e29Smrg amdgpu_bo_list_handle bo_list; 10823f012e29Smrg amdgpu_va_handle va_handle; 1083d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1084d8807b2fSmrg 1085d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1086d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 10873f012e29Smrg 10883f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 10893f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10903f012e29Smrg 1091d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 10923f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 10933f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 10943f012e29Smrg &ib_result_handle, &ib_result_cpu, 10953f012e29Smrg &ib_result_mc_address, &va_handle); 10963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10973f012e29Smrg 10983f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 10993f012e29Smrg &bo_list); 11003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11013f012e29Smrg 11023f012e29Smrg ptr = ib_result_cpu; 1103d8807b2fSmrg memset(ptr, 0, 16); 1104d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 11053f012e29Smrg 11063f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 11073f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 11083f012e29Smrg ib_info.size = 16; 11093f012e29Smrg 11103f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 11113f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 11123f012e29Smrg ibs_request.ring = instance; 11133f012e29Smrg ibs_request.number_of_ibs = 1; 11143f012e29Smrg ibs_request.ibs = &ib_info; 11153f012e29Smrg ibs_request.resources = bo_list; 11163f012e29Smrg ibs_request.fence_info.handle = NULL; 11173f012e29Smrg 11183f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 11193f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 11203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11213f012e29Smrg 11223f012e29Smrg fence_status.context = context_handle; 11233f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 11243f012e29Smrg fence_status.ip_instance = 0; 11253f012e29Smrg fence_status.ring = instance; 11263f012e29Smrg fence_status.fence = ibs_request.seq_no; 11273f012e29Smrg 11283f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11293f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 11303f012e29Smrg 0, &expired); 11313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11323f012e29Smrg 11333f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 11343f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11353f012e29Smrg 11363f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 11373f012e29Smrg ib_result_mc_address, 4096); 11383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11393f012e29Smrg } 11403f012e29Smrg 11413f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 11423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11433f012e29Smrg} 11443f012e29Smrg 11453f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 11463f012e29Smrg{ 11473f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 11483f012e29Smrg} 11493f012e29Smrg 11503f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 11513f012e29Smrg{ 11523f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 11533f012e29Smrg} 11543f012e29Smrg 11553f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 11563f012e29Smrg{ 11573f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 11583f012e29Smrg} 11593f012e29Smrg 11603f012e29Smrgstatic void amdgpu_command_submission_compute(void) 11613f012e29Smrg{ 11623f012e29Smrg /* write data using the CP */ 11633f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 11643f012e29Smrg /* const fill using the CP */ 11653f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 11663f012e29Smrg /* copy data using the CP */ 11673f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 11683f012e29Smrg /* nop test */ 11693f012e29Smrg amdgpu_command_submission_compute_nop(); 11703f012e29Smrg} 11713f012e29Smrg 11723f012e29Smrg/* 11733f012e29Smrg * caller need create/release: 11743f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 11753f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 11763f012e29Smrg */ 11773f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 11783f012e29Smrg unsigned ip_type, 11793f012e29Smrg int instance, int pm4_dw, uint32_t *pm4_src, 11803f012e29Smrg int res_cnt, amdgpu_bo_handle *resources, 11813f012e29Smrg struct amdgpu_cs_ib_info *ib_info, 11823f012e29Smrg struct amdgpu_cs_request *ibs_request) 11833f012e29Smrg{ 11843f012e29Smrg int r; 11853f012e29Smrg uint32_t expired; 11863f012e29Smrg uint32_t *ring_ptr; 11873f012e29Smrg amdgpu_bo_handle ib_result_handle; 11883f012e29Smrg void *ib_result_cpu; 11893f012e29Smrg uint64_t ib_result_mc_address; 11903f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 11913f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 11923f012e29Smrg amdgpu_va_handle va_handle; 11933f012e29Smrg 11943f012e29Smrg /* prepare CS */ 11953f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 11963f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 11973f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 11983f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 11993f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 12003f012e29Smrg 12013f012e29Smrg /* allocate IB */ 12023f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 12033f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 12043f012e29Smrg &ib_result_handle, &ib_result_cpu, 12053f012e29Smrg &ib_result_mc_address, &va_handle); 12063f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12073f012e29Smrg 12083f012e29Smrg /* copy PM4 packet to ring from caller */ 12093f012e29Smrg ring_ptr = ib_result_cpu; 12103f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 12113f012e29Smrg 12123f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 12133f012e29Smrg ib_info->size = pm4_dw; 12143f012e29Smrg 12153f012e29Smrg ibs_request->ip_type = ip_type; 12163f012e29Smrg ibs_request->ring = instance; 12173f012e29Smrg ibs_request->number_of_ibs = 1; 12183f012e29Smrg ibs_request->ibs = ib_info; 12193f012e29Smrg ibs_request->fence_info.handle = NULL; 12203f012e29Smrg 12213f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 12223f012e29Smrg all_res[res_cnt] = ib_result_handle; 12233f012e29Smrg 12243f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 12253f012e29Smrg NULL, &ibs_request->resources); 12263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12273f012e29Smrg 12283f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12293f012e29Smrg 12303f012e29Smrg /* submit CS */ 12313f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 12323f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12333f012e29Smrg 12343f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 12353f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12363f012e29Smrg 12373f012e29Smrg fence_status.ip_type = ip_type; 12383f012e29Smrg fence_status.ip_instance = 0; 12393f012e29Smrg fence_status.ring = ibs_request->ring; 12403f012e29Smrg fence_status.context = context_handle; 12413f012e29Smrg fence_status.fence = ibs_request->seq_no; 12423f012e29Smrg 12433f012e29Smrg /* wait for IB accomplished */ 12443f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 12453f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 12463f012e29Smrg 0, &expired); 12473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12483f012e29Smrg CU_ASSERT_EQUAL(expired, true); 12493f012e29Smrg 12503f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 12513f012e29Smrg ib_result_mc_address, 4096); 12523f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12533f012e29Smrg} 12543f012e29Smrg 12553f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 12563f012e29Smrg{ 12573f012e29Smrg const int sdma_write_length = 128; 12583f012e29Smrg const int pm4_dw = 256; 12593f012e29Smrg amdgpu_context_handle context_handle; 12603f012e29Smrg amdgpu_bo_handle bo; 12613f012e29Smrg amdgpu_bo_handle *resources; 12623f012e29Smrg uint32_t *pm4; 12633f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 12643f012e29Smrg struct amdgpu_cs_request *ibs_request; 12653f012e29Smrg uint64_t bo_mc; 12663f012e29Smrg volatile uint32_t *bo_cpu; 126700a23bdaSmrg int i, j, r, loop, ring_id; 12683f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 12693f012e29Smrg amdgpu_va_handle va_handle; 127000a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 12713f012e29Smrg 12723f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 12733f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 12743f012e29Smrg 12753f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 12763f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 12773f012e29Smrg 12783f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 12793f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12803f012e29Smrg 128100a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 128200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 128300a23bdaSmrg 12843f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 12853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12863f012e29Smrg 12873f012e29Smrg /* prepare resource */ 12883f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 12893f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 12903f012e29Smrg 129100a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 129200a23bdaSmrg loop = 0; 129300a23bdaSmrg while(loop < 2) { 129400a23bdaSmrg /* allocate UC bo for sDMA use */ 129500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 129600a23bdaSmrg sdma_write_length * sizeof(uint32_t), 129700a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 129800a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 129900a23bdaSmrg &bo_mc, &va_handle); 130000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 13013f012e29Smrg 130200a23bdaSmrg /* clear bo */ 130300a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 13043f012e29Smrg 130500a23bdaSmrg resources[0] = bo; 13063f012e29Smrg 130700a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 130800a23bdaSmrg i = j = 0; 130900a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 131000a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 131100a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 131200a23bdaSmrg sdma_write_length); 131300a23bdaSmrg else 131400a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 131500a23bdaSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 131600a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 131700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 131800a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 131900a23bdaSmrg pm4[i++] = sdma_write_length - 1; 132000a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 132100a23bdaSmrg pm4[i++] = sdma_write_length; 132200a23bdaSmrg while(j++ < sdma_write_length) 132300a23bdaSmrg pm4[i++] = 0xdeadbeaf; 132400a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 132500a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 132600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 132700a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 132800a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 132900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 133000a23bdaSmrg while(j++ < sdma_write_length) 133100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 133200a23bdaSmrg } 13333f012e29Smrg 133400a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 133500a23bdaSmrg ip_type, ring_id, 133600a23bdaSmrg i, pm4, 133700a23bdaSmrg 1, resources, 133800a23bdaSmrg ib_info, ibs_request); 13393f012e29Smrg 134000a23bdaSmrg /* verify if SDMA test result meets with expected */ 134100a23bdaSmrg i = 0; 134200a23bdaSmrg while(i < sdma_write_length) { 134300a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 134400a23bdaSmrg } 13453f012e29Smrg 134600a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 134700a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 134800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 134900a23bdaSmrg loop++; 13503f012e29Smrg } 13513f012e29Smrg } 13523f012e29Smrg /* clean resources */ 13533f012e29Smrg free(resources); 13543f012e29Smrg free(ibs_request); 13553f012e29Smrg free(ib_info); 13563f012e29Smrg free(pm4); 13573f012e29Smrg 13583f012e29Smrg /* end of test */ 13593f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 13603f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13613f012e29Smrg} 13623f012e29Smrg 13633f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 13643f012e29Smrg{ 13653f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 13663f012e29Smrg} 13673f012e29Smrg 13683f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 13693f012e29Smrg{ 13703f012e29Smrg const int sdma_write_length = 1024 * 1024; 13713f012e29Smrg const int pm4_dw = 256; 13723f012e29Smrg amdgpu_context_handle context_handle; 13733f012e29Smrg amdgpu_bo_handle bo; 13743f012e29Smrg amdgpu_bo_handle *resources; 13753f012e29Smrg uint32_t *pm4; 13763f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 13773f012e29Smrg struct amdgpu_cs_request *ibs_request; 13783f012e29Smrg uint64_t bo_mc; 13793f012e29Smrg volatile uint32_t *bo_cpu; 138000a23bdaSmrg int i, j, r, loop, ring_id; 13813f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 13823f012e29Smrg amdgpu_va_handle va_handle; 138300a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 13843f012e29Smrg 13853f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 13863f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 13873f012e29Smrg 13883f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 13893f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 13903f012e29Smrg 13913f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 13923f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13933f012e29Smrg 139400a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 139500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 139600a23bdaSmrg 13973f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 13983f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13993f012e29Smrg 14003f012e29Smrg /* prepare resource */ 14013f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 14023f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 14033f012e29Smrg 140400a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 140500a23bdaSmrg loop = 0; 140600a23bdaSmrg while(loop < 2) { 140700a23bdaSmrg /* allocate UC bo for sDMA use */ 140800a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 140900a23bdaSmrg sdma_write_length, 4096, 141000a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 141100a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 141200a23bdaSmrg &bo_mc, &va_handle); 141300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 14143f012e29Smrg 141500a23bdaSmrg /* clear bo */ 141600a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 14173f012e29Smrg 141800a23bdaSmrg resources[0] = bo; 14193f012e29Smrg 142000a23bdaSmrg /* fulfill PM4: test DMA const fill */ 142100a23bdaSmrg i = j = 0; 142200a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 142300a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 142400a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 142500a23bdaSmrg 0, 0, 0, 142600a23bdaSmrg sdma_write_length / 4); 142700a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 142800a23bdaSmrg pm4[i++] = 0xdeadbeaf; 142900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 143000a23bdaSmrg } else { 143100a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 143200a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 143300a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 143400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 143500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 143600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 143700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 143800a23bdaSmrg else 143900a23bdaSmrg pm4[i++] = sdma_write_length; 144000a23bdaSmrg } 144100a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 144200a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 144300a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 144400a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 144500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 144600a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 144700a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 144800a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 144900a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 145000a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 145100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1452d8807b2fSmrg pm4[i++] = sdma_write_length; 145300a23bdaSmrg } else { 145400a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 145500a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 145600a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 145700a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 145800a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 145900a23bdaSmrg pm4[i++] = 0xdeadbeaf; 146000a23bdaSmrg pm4[i++] = 0; 146100a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 146200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 146300a23bdaSmrg pm4[i++] = sdma_write_length; 146400a23bdaSmrg } 1465d8807b2fSmrg } 14663f012e29Smrg 146700a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 146800a23bdaSmrg ip_type, ring_id, 146900a23bdaSmrg i, pm4, 147000a23bdaSmrg 1, resources, 147100a23bdaSmrg ib_info, ibs_request); 14723f012e29Smrg 147300a23bdaSmrg /* verify if SDMA test result meets with expected */ 147400a23bdaSmrg i = 0; 147500a23bdaSmrg while(i < (sdma_write_length / 4)) { 147600a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 147700a23bdaSmrg } 14783f012e29Smrg 147900a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 148000a23bdaSmrg sdma_write_length); 148100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 148200a23bdaSmrg loop++; 148300a23bdaSmrg } 14843f012e29Smrg } 14853f012e29Smrg /* clean resources */ 14863f012e29Smrg free(resources); 14873f012e29Smrg free(ibs_request); 14883f012e29Smrg free(ib_info); 14893f012e29Smrg free(pm4); 14903f012e29Smrg 14913f012e29Smrg /* end of test */ 14923f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 14933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14943f012e29Smrg} 14953f012e29Smrg 14963f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 14973f012e29Smrg{ 14983f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 14993f012e29Smrg} 15003f012e29Smrg 15013f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 15023f012e29Smrg{ 15033f012e29Smrg const int sdma_write_length = 1024; 15043f012e29Smrg const int pm4_dw = 256; 15053f012e29Smrg amdgpu_context_handle context_handle; 15063f012e29Smrg amdgpu_bo_handle bo1, bo2; 15073f012e29Smrg amdgpu_bo_handle *resources; 15083f012e29Smrg uint32_t *pm4; 15093f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 15103f012e29Smrg struct amdgpu_cs_request *ibs_request; 15113f012e29Smrg uint64_t bo1_mc, bo2_mc; 15123f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 151300a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 15143f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 15153f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 151600a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 15173f012e29Smrg 15183f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 15193f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 15203f012e29Smrg 15213f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 15223f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 15233f012e29Smrg 15243f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 15253f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15263f012e29Smrg 152700a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 152800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 152900a23bdaSmrg 15303f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 15313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15323f012e29Smrg 15333f012e29Smrg /* prepare resource */ 15343f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 15353f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 15363f012e29Smrg 153700a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 153800a23bdaSmrg loop1 = loop2 = 0; 153900a23bdaSmrg /* run 9 circle to test all mapping combination */ 154000a23bdaSmrg while(loop1 < 2) { 154100a23bdaSmrg while(loop2 < 2) { 154200a23bdaSmrg /* allocate UC bo1for sDMA use */ 154300a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 154400a23bdaSmrg sdma_write_length, 4096, 154500a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 154600a23bdaSmrg gtt_flags[loop1], &bo1, 154700a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 154800a23bdaSmrg &bo1_va_handle); 154900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 155000a23bdaSmrg 155100a23bdaSmrg /* set bo1 */ 155200a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 155300a23bdaSmrg 155400a23bdaSmrg /* allocate UC bo2 for sDMA use */ 155500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 155600a23bdaSmrg sdma_write_length, 4096, 155700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 155800a23bdaSmrg gtt_flags[loop2], &bo2, 155900a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 156000a23bdaSmrg &bo2_va_handle); 156100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 156200a23bdaSmrg 156300a23bdaSmrg /* clear bo2 */ 156400a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 156500a23bdaSmrg 156600a23bdaSmrg resources[0] = bo1; 156700a23bdaSmrg resources[1] = bo2; 156800a23bdaSmrg 156900a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 157000a23bdaSmrg i = j = 0; 157100a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 157200a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 157300a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 157400a23bdaSmrg 0, 0, 0, 157500a23bdaSmrg sdma_write_length); 157600a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 157700a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 157800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 157900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 158000a23bdaSmrg } else { 158100a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 158200a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 158300a23bdaSmrg 0); 158400a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 158500a23bdaSmrg pm4[i++] = sdma_write_length - 1; 158600a23bdaSmrg else 158700a23bdaSmrg pm4[i++] = sdma_write_length; 158800a23bdaSmrg pm4[i++] = 0; 158900a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 159000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 159100a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 159200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 159300a23bdaSmrg } 159400a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 159500a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 159600a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 159700a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 159800a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 159900a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 160000a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 160100a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 160200a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 160300a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 160400a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 160500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1606d8807b2fSmrg pm4[i++] = sdma_write_length; 160700a23bdaSmrg } else { 160800a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 160900a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 161000a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 161100a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 161200a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 161300a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 161400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 161500a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 161600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 161700a23bdaSmrg pm4[i++] = sdma_write_length; 161800a23bdaSmrg } 1619d8807b2fSmrg } 16203f012e29Smrg 162100a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 162200a23bdaSmrg ip_type, ring_id, 162300a23bdaSmrg i, pm4, 162400a23bdaSmrg 2, resources, 162500a23bdaSmrg ib_info, ibs_request); 16263f012e29Smrg 162700a23bdaSmrg /* verify if SDMA test result meets with expected */ 162800a23bdaSmrg i = 0; 162900a23bdaSmrg while(i < sdma_write_length) { 163000a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 163100a23bdaSmrg } 163200a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 163300a23bdaSmrg sdma_write_length); 163400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 163500a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 163600a23bdaSmrg sdma_write_length); 163700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 163800a23bdaSmrg loop2++; 16393f012e29Smrg } 164000a23bdaSmrg loop1++; 16413f012e29Smrg } 16423f012e29Smrg } 16433f012e29Smrg /* clean resources */ 16443f012e29Smrg free(resources); 16453f012e29Smrg free(ibs_request); 16463f012e29Smrg free(ib_info); 16473f012e29Smrg free(pm4); 16483f012e29Smrg 16493f012e29Smrg /* end of test */ 16503f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 16513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16523f012e29Smrg} 16533f012e29Smrg 16543f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 16553f012e29Smrg{ 16563f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 16573f012e29Smrg} 16583f012e29Smrg 16593f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 16603f012e29Smrg{ 16613f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 16623f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 16633f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 16643f012e29Smrg} 16653f012e29Smrg 1666d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1667d8807b2fSmrg{ 1668d8807b2fSmrg amdgpu_context_handle context_handle; 1669d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1670d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1671d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1672d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1673d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1674d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1675d8807b2fSmrg uint32_t *ptr; 1676d8807b2fSmrg uint32_t expired; 1677d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1678d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1679d8807b2fSmrg int r; 1680d8807b2fSmrg int i = 0, ib_cs_num = 2; 1681d8807b2fSmrg 1682d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1683d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1684d8807b2fSmrg 1685d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1686d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1687d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1688d8807b2fSmrg &ib_result_mc_address, &va_handle); 1689d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1690d8807b2fSmrg 1691d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1692d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1693d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1694d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1695d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1696d8807b2fSmrg 1697d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1698d8807b2fSmrg ib_result_ce_handle, &bo_list); 1699d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1700d8807b2fSmrg 1701d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1702d8807b2fSmrg 1703d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1704d8807b2fSmrg ptr = ib_result_ce_cpu; 1705d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1706d8807b2fSmrg ptr[i++] = 0xc0008900; 1707d8807b2fSmrg ptr[i++] = 0; 1708d8807b2fSmrg } 1709d8807b2fSmrg ptr[i++] = 0xc0008400; 1710d8807b2fSmrg ptr[i++] = 1; 1711d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1712d8807b2fSmrg ib_info[0].size = i; 1713d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1714d8807b2fSmrg 1715d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1716d8807b2fSmrg ptr = ib_result_cpu; 1717d8807b2fSmrg ptr[0] = 0xc0008600; 1718d8807b2fSmrg ptr[1] = 0x00000001; 1719d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1720d8807b2fSmrg ib_info[1].size = 2; 1721d8807b2fSmrg 1722d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1723d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1724d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1725d8807b2fSmrg ibs_request[i].ibs = ib_info; 1726d8807b2fSmrg ibs_request[i].resources = bo_list; 1727d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1728d8807b2fSmrg } 1729d8807b2fSmrg 1730d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1731d8807b2fSmrg 1732d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1733d8807b2fSmrg 1734d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1735d8807b2fSmrg fence_status[i].context = context_handle; 1736d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1737d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1738d8807b2fSmrg } 1739d8807b2fSmrg 1740d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1741d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1742d8807b2fSmrg &expired, NULL); 1743d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1744d8807b2fSmrg 1745d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1746d8807b2fSmrg ib_result_mc_address, 4096); 1747d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1748d8807b2fSmrg 1749d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1750d8807b2fSmrg ib_result_ce_mc_address, 4096); 1751d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1752d8807b2fSmrg 1753d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 1754d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1755d8807b2fSmrg 1756d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 1757d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1758d8807b2fSmrg} 1759d8807b2fSmrg 1760d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 1761d8807b2fSmrg{ 1762d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 1763d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 1764d8807b2fSmrg} 1765d8807b2fSmrg 17663f012e29Smrgstatic void amdgpu_userptr_test(void) 17673f012e29Smrg{ 17683f012e29Smrg int i, r, j; 17693f012e29Smrg uint32_t *pm4 = NULL; 17703f012e29Smrg uint64_t bo_mc; 17713f012e29Smrg void *ptr = NULL; 17723f012e29Smrg int pm4_dw = 256; 17733f012e29Smrg int sdma_write_length = 4; 17743f012e29Smrg amdgpu_bo_handle handle; 17753f012e29Smrg amdgpu_context_handle context_handle; 17763f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 17773f012e29Smrg struct amdgpu_cs_request *ibs_request; 17783f012e29Smrg amdgpu_bo_handle buf_handle; 17793f012e29Smrg amdgpu_va_handle va_handle; 17803f012e29Smrg 17813f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 17823f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 17833f012e29Smrg 17843f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 17853f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 17863f012e29Smrg 17873f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 17883f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 17893f012e29Smrg 17903f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 17913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17923f012e29Smrg 17933f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 17943f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 17953f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 17963f012e29Smrg 17973f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 17983f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 17993f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18003f012e29Smrg 18013f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 18023f012e29Smrg amdgpu_gpu_va_range_general, 18033f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 18043f012e29Smrg &va_handle, 0); 18053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18063f012e29Smrg 18073f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 18083f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18093f012e29Smrg 18103f012e29Smrg handle = buf_handle; 18113f012e29Smrg 18123f012e29Smrg j = i = 0; 1813d8807b2fSmrg 1814d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 1815d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1816d8807b2fSmrg sdma_write_length); 1817d8807b2fSmrg else 1818d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1819d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 18203f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 18213f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1822d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 1823d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 1824d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 1825d8807b2fSmrg pm4[i++] = sdma_write_length; 18263f012e29Smrg 18273f012e29Smrg while (j++ < sdma_write_length) 18283f012e29Smrg pm4[i++] = 0xdeadbeaf; 18293f012e29Smrg 183000a23bdaSmrg if (!fork()) { 183100a23bdaSmrg pm4[0] = 0x0; 183200a23bdaSmrg exit(0); 183300a23bdaSmrg } 183400a23bdaSmrg 18353f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 18363f012e29Smrg AMDGPU_HW_IP_DMA, 0, 18373f012e29Smrg i, pm4, 18383f012e29Smrg 1, &handle, 18393f012e29Smrg ib_info, ibs_request); 18403f012e29Smrg i = 0; 18413f012e29Smrg while (i < sdma_write_length) { 18423f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 18433f012e29Smrg } 18443f012e29Smrg free(ibs_request); 18453f012e29Smrg free(ib_info); 18463f012e29Smrg free(pm4); 18473f012e29Smrg 18483f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 18493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18503f012e29Smrg r = amdgpu_va_range_free(va_handle); 18513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18523f012e29Smrg r = amdgpu_bo_free(buf_handle); 18533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18543f012e29Smrg free(ptr); 18553f012e29Smrg 18563f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 18573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 185800a23bdaSmrg 185900a23bdaSmrg wait(NULL); 186000a23bdaSmrg} 186100a23bdaSmrg 186200a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 186300a23bdaSmrg{ 186400a23bdaSmrg amdgpu_context_handle context_handle[2]; 186500a23bdaSmrg amdgpu_bo_handle ib_result_handle; 186600a23bdaSmrg void *ib_result_cpu; 186700a23bdaSmrg uint64_t ib_result_mc_address; 186800a23bdaSmrg struct amdgpu_cs_request ibs_request; 186900a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 187000a23bdaSmrg struct amdgpu_cs_fence fence_status; 187100a23bdaSmrg uint32_t expired; 187200a23bdaSmrg int i, j, r; 187300a23bdaSmrg amdgpu_bo_list_handle bo_list; 187400a23bdaSmrg amdgpu_va_handle va_handle; 187500a23bdaSmrg static uint32_t *ptr; 187600a23bdaSmrg uint64_t seq_no; 187700a23bdaSmrg 187800a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 187900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188000a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 188100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188200a23bdaSmrg 188300a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 188400a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 188500a23bdaSmrg &ib_result_handle, &ib_result_cpu, 188600a23bdaSmrg &ib_result_mc_address, &va_handle); 188700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188800a23bdaSmrg 188900a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 189000a23bdaSmrg &bo_list); 189100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 189200a23bdaSmrg 189300a23bdaSmrg ptr = ib_result_cpu; 189400a23bdaSmrg i = 0; 189500a23bdaSmrg 189600a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 189700a23bdaSmrg 189800a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 189900a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 190000a23bdaSmrg ptr[i++] = 0x80000000; 190100a23bdaSmrg ptr[i++] = 0x80000000; 190200a23bdaSmrg 190300a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 190400a23bdaSmrg ptr[i++] = 0x80000000; 190500a23bdaSmrg 190600a23bdaSmrg 190700a23bdaSmrg /* Program compute regs */ 190800a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 190900a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 191000a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 191100a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 191200a23bdaSmrg 191300a23bdaSmrg 191400a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 191500a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 191600a23bdaSmrg /* 191700a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 191800a23bdaSmrg SGPRS = 1 191900a23bdaSmrg PRIORITY = 0 192000a23bdaSmrg FLOAT_MODE = 192 (0xc0) 192100a23bdaSmrg PRIV = 0 192200a23bdaSmrg DX10_CLAMP = 1 192300a23bdaSmrg DEBUG_MODE = 0 192400a23bdaSmrg IEEE_MODE = 0 192500a23bdaSmrg BULKY = 0 192600a23bdaSmrg CDBG_USER = 0 192700a23bdaSmrg * 192800a23bdaSmrg */ 192900a23bdaSmrg ptr[i++] = 0x002c0040; 193000a23bdaSmrg 193100a23bdaSmrg 193200a23bdaSmrg /* 193300a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 193400a23bdaSmrg USER_SGPR = 8 193500a23bdaSmrg TRAP_PRESENT = 0 193600a23bdaSmrg TGID_X_EN = 0 193700a23bdaSmrg TGID_Y_EN = 0 193800a23bdaSmrg TGID_Z_EN = 0 193900a23bdaSmrg TG_SIZE_EN = 0 194000a23bdaSmrg TIDIG_COMP_CNT = 0 194100a23bdaSmrg EXCP_EN_MSB = 0 194200a23bdaSmrg LDS_SIZE = 0 194300a23bdaSmrg EXCP_EN = 0 194400a23bdaSmrg * 194500a23bdaSmrg */ 194600a23bdaSmrg ptr[i++] = 0x00000010; 194700a23bdaSmrg 194800a23bdaSmrg 194900a23bdaSmrg/* 195000a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 195100a23bdaSmrg WAVESIZE = 0 195200a23bdaSmrg * 195300a23bdaSmrg */ 195400a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 195500a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 195600a23bdaSmrg ptr[i++] = 0x00000100; 195700a23bdaSmrg 195800a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 195900a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 196000a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 196100a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 196200a23bdaSmrg 196300a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 196400a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 196500a23bdaSmrg ptr[i++] = 0; 196600a23bdaSmrg 196700a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 196800a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 196900a23bdaSmrg ptr[i++] = 1; 197000a23bdaSmrg ptr[i++] = 1; 197100a23bdaSmrg ptr[i++] = 1; 197200a23bdaSmrg 197300a23bdaSmrg 197400a23bdaSmrg /* Dispatch */ 197500a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 197600a23bdaSmrg ptr[i++] = 1; 197700a23bdaSmrg ptr[i++] = 1; 197800a23bdaSmrg ptr[i++] = 1; 197900a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 198000a23bdaSmrg 198100a23bdaSmrg 198200a23bdaSmrg while (i & 7) 198300a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 198400a23bdaSmrg 198500a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 198600a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 198700a23bdaSmrg ib_info.size = i; 198800a23bdaSmrg 198900a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 199000a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 199100a23bdaSmrg ibs_request.ring = 0; 199200a23bdaSmrg ibs_request.number_of_ibs = 1; 199300a23bdaSmrg ibs_request.ibs = &ib_info; 199400a23bdaSmrg ibs_request.resources = bo_list; 199500a23bdaSmrg ibs_request.fence_info.handle = NULL; 199600a23bdaSmrg 199700a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 199800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 199900a23bdaSmrg seq_no = ibs_request.seq_no; 200000a23bdaSmrg 200100a23bdaSmrg 200200a23bdaSmrg 200300a23bdaSmrg /* Prepare second command with dependency on the first */ 200400a23bdaSmrg j = i; 200500a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 200600a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 200700a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 200800a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 200900a23bdaSmrg ptr[i++] = 99; 201000a23bdaSmrg 201100a23bdaSmrg while (i & 7) 201200a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 201300a23bdaSmrg 201400a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 201500a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 201600a23bdaSmrg ib_info.size = i - j; 201700a23bdaSmrg 201800a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 201900a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 202000a23bdaSmrg ibs_request.ring = 0; 202100a23bdaSmrg ibs_request.number_of_ibs = 1; 202200a23bdaSmrg ibs_request.ibs = &ib_info; 202300a23bdaSmrg ibs_request.resources = bo_list; 202400a23bdaSmrg ibs_request.fence_info.handle = NULL; 202500a23bdaSmrg 202600a23bdaSmrg ibs_request.number_of_dependencies = 1; 202700a23bdaSmrg 202800a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 202900a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 203000a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 203100a23bdaSmrg ibs_request.dependencies[0].ring = 0; 203200a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 203300a23bdaSmrg 203400a23bdaSmrg 203500a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 203600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 203700a23bdaSmrg 203800a23bdaSmrg 203900a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 204000a23bdaSmrg fence_status.context = context_handle[0]; 204100a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 204200a23bdaSmrg fence_status.ip_instance = 0; 204300a23bdaSmrg fence_status.ring = 0; 204400a23bdaSmrg fence_status.fence = ibs_request.seq_no; 204500a23bdaSmrg 204600a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 204700a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 204800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 204900a23bdaSmrg 205000a23bdaSmrg /* Expect the second command to wait for shader to complete */ 205100a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 205200a23bdaSmrg 205300a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 205400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 205500a23bdaSmrg 205600a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 205700a23bdaSmrg ib_result_mc_address, 4096); 205800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 205900a23bdaSmrg 206000a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 206100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 206200a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 206300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 206400a23bdaSmrg 206500a23bdaSmrg free(ibs_request.dependencies); 20663f012e29Smrg} 20675324fb0dSmrg 20685324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 20695324fb0dSmrg int cs_type) 20705324fb0dSmrg{ 20715324fb0dSmrg uint32_t shader_size; 20725324fb0dSmrg const uint32_t *shader; 20735324fb0dSmrg 20745324fb0dSmrg switch (cs_type) { 20755324fb0dSmrg case CS_BUFFERCLEAR: 20765324fb0dSmrg shader = bufferclear_cs_shader_gfx9; 20775324fb0dSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 20785324fb0dSmrg break; 20795324fb0dSmrg case CS_BUFFERCOPY: 20805324fb0dSmrg shader = buffercopy_cs_shader_gfx9; 20815324fb0dSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 20825324fb0dSmrg break; 20835324fb0dSmrg default: 20845324fb0dSmrg return -1; 20855324fb0dSmrg break; 20865324fb0dSmrg } 20875324fb0dSmrg 20885324fb0dSmrg memcpy(ptr, shader, shader_size); 20895324fb0dSmrg return 0; 20905324fb0dSmrg} 20915324fb0dSmrg 20925324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 20935324fb0dSmrg{ 20945324fb0dSmrg int i = 0; 20955324fb0dSmrg 20965324fb0dSmrg /* Write context control and load shadowing register if necessary */ 20975324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 20985324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 20995324fb0dSmrg ptr[i++] = 0x80000000; 21005324fb0dSmrg ptr[i++] = 0x80000000; 21015324fb0dSmrg } 21025324fb0dSmrg 21035324fb0dSmrg /* Issue commands to set default compute state. */ 21045324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 21055324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 21065324fb0dSmrg ptr[i++] = 0x204; 21075324fb0dSmrg i += 3; 210888f8a8d2Smrg 21095324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 21105324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 21115324fb0dSmrg ptr[i++] = 0x218; 21125324fb0dSmrg ptr[i++] = 0; 21135324fb0dSmrg 21145324fb0dSmrg return i; 21155324fb0dSmrg} 21165324fb0dSmrg 21175324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr) 21185324fb0dSmrg{ 21195324fb0dSmrg int i = 0; 21205324fb0dSmrg 21215324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 21225324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 21235324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21245324fb0dSmrg ptr[i++] = 0x216; 21255324fb0dSmrg ptr[i++] = 0xffffffff; 21265324fb0dSmrg ptr[i++] = 0xffffffff; 21275324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 21285324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21295324fb0dSmrg ptr[i++] = 0x219; 21305324fb0dSmrg ptr[i++] = 0xffffffff; 21315324fb0dSmrg ptr[i++] = 0xffffffff; 21325324fb0dSmrg 21335324fb0dSmrg return i; 21345324fb0dSmrg} 21355324fb0dSmrg 21365324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 21375324fb0dSmrg{ 21385324fb0dSmrg int i, j; 21395324fb0dSmrg 21405324fb0dSmrg i = 0; 21415324fb0dSmrg 21425324fb0dSmrg /* Writes shader state to HW */ 21435324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 21445324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21455324fb0dSmrg ptr[i++] = 0x20c; 21465324fb0dSmrg ptr[i++] = (shader_addr >> 8); 21475324fb0dSmrg ptr[i++] = (shader_addr >> 40); 21485324fb0dSmrg /* write sh regs*/ 21495324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 21505324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 21515324fb0dSmrg /* - Gfx9ShRegBase */ 21525324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 21535324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 21545324fb0dSmrg } 21555324fb0dSmrg 21565324fb0dSmrg return i; 21575324fb0dSmrg} 21585324fb0dSmrg 21595324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 21605324fb0dSmrg uint32_t ip_type, 21615324fb0dSmrg uint32_t ring) 21625324fb0dSmrg{ 21635324fb0dSmrg amdgpu_context_handle context_handle; 21645324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 21655324fb0dSmrg volatile unsigned char *ptr_dst; 21665324fb0dSmrg void *ptr_shader; 21675324fb0dSmrg uint32_t *ptr_cmd; 21685324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 21695324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 21705324fb0dSmrg int i, r; 21715324fb0dSmrg int bo_dst_size = 16384; 21725324fb0dSmrg int bo_shader_size = 4096; 21735324fb0dSmrg int bo_cmd_size = 4096; 21745324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 21755324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 21765324fb0dSmrg amdgpu_bo_list_handle bo_list; 21775324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 21785324fb0dSmrg uint32_t expired; 21795324fb0dSmrg 21805324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 21815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21825324fb0dSmrg 21835324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 21845324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 21855324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 21865324fb0dSmrg &mc_address_cmd, &va_cmd); 21875324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21885324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 21895324fb0dSmrg 21905324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 21915324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 21925324fb0dSmrg &bo_shader, &ptr_shader, 21935324fb0dSmrg &mc_address_shader, &va_shader); 21945324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 219588f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 21965324fb0dSmrg 21975324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 21985324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21995324fb0dSmrg 22005324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 22015324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 22025324fb0dSmrg &bo_dst, (void **)&ptr_dst, 22035324fb0dSmrg &mc_address_dst, &va_dst); 22045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22055324fb0dSmrg 22065324fb0dSmrg i = 0; 22075324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 22085324fb0dSmrg 22095324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 22105324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 22115324fb0dSmrg 22125324fb0dSmrg /* Writes shader state to HW */ 22135324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 22145324fb0dSmrg 22155324fb0dSmrg /* Write constant data */ 22165324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 22175324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 22185324fb0dSmrg ptr_cmd[i++] = 0x240; 22195324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 22205324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 22215324fb0dSmrg ptr_cmd[i++] = 0x400; 22225324fb0dSmrg ptr_cmd[i++] = 0x74fac; 22235324fb0dSmrg 22245324fb0dSmrg /* Sets a range of pixel shader constants */ 22255324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 22265324fb0dSmrg ptr_cmd[i++] = 0x244; 22275324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22285324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22295324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22305324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22315324fb0dSmrg 223288f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 223388f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 223488f8a8d2Smrg ptr_cmd[i++] = 0x215; 223588f8a8d2Smrg ptr_cmd[i++] = 0; 223688f8a8d2Smrg 22375324fb0dSmrg /* dispatch direct command */ 22385324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 22395324fb0dSmrg ptr_cmd[i++] = 0x10; 22405324fb0dSmrg ptr_cmd[i++] = 1; 22415324fb0dSmrg ptr_cmd[i++] = 1; 22425324fb0dSmrg ptr_cmd[i++] = 1; 22435324fb0dSmrg 22445324fb0dSmrg while (i & 7) 22455324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 22465324fb0dSmrg 22475324fb0dSmrg resources[0] = bo_dst; 22485324fb0dSmrg resources[1] = bo_shader; 22495324fb0dSmrg resources[2] = bo_cmd; 22505324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 22515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22525324fb0dSmrg 22535324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 22545324fb0dSmrg ib_info.size = i; 22555324fb0dSmrg ibs_request.ip_type = ip_type; 22565324fb0dSmrg ibs_request.ring = ring; 22575324fb0dSmrg ibs_request.resources = bo_list; 22585324fb0dSmrg ibs_request.number_of_ibs = 1; 22595324fb0dSmrg ibs_request.ibs = &ib_info; 22605324fb0dSmrg ibs_request.fence_info.handle = NULL; 22615324fb0dSmrg 22625324fb0dSmrg /* submit CS */ 22635324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 22645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22655324fb0dSmrg 22665324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 22675324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22685324fb0dSmrg 22695324fb0dSmrg fence_status.ip_type = ip_type; 22705324fb0dSmrg fence_status.ip_instance = 0; 22715324fb0dSmrg fence_status.ring = ring; 22725324fb0dSmrg fence_status.context = context_handle; 22735324fb0dSmrg fence_status.fence = ibs_request.seq_no; 22745324fb0dSmrg 22755324fb0dSmrg /* wait for IB accomplished */ 22765324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 22775324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 22785324fb0dSmrg 0, &expired); 22795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22805324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 22815324fb0dSmrg 22825324fb0dSmrg /* verify if memset test result meets with expected */ 22835324fb0dSmrg i = 0; 22845324fb0dSmrg while(i < bo_dst_size) { 22855324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 22865324fb0dSmrg } 22875324fb0dSmrg 22885324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 22895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22905324fb0dSmrg 22915324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 22925324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22935324fb0dSmrg 22945324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 22955324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22965324fb0dSmrg 22975324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 22985324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22995324fb0dSmrg} 23005324fb0dSmrg 23015324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 23025324fb0dSmrg uint32_t ip_type, 23035324fb0dSmrg uint32_t ring) 23045324fb0dSmrg{ 23055324fb0dSmrg amdgpu_context_handle context_handle; 23065324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 23075324fb0dSmrg volatile unsigned char *ptr_dst; 23085324fb0dSmrg void *ptr_shader; 23095324fb0dSmrg unsigned char *ptr_src; 23105324fb0dSmrg uint32_t *ptr_cmd; 23115324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 23125324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 23135324fb0dSmrg int i, r; 23145324fb0dSmrg int bo_dst_size = 16384; 23155324fb0dSmrg int bo_shader_size = 4096; 23165324fb0dSmrg int bo_cmd_size = 4096; 23175324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 23185324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 23195324fb0dSmrg uint32_t expired; 23205324fb0dSmrg amdgpu_bo_list_handle bo_list; 23215324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 23225324fb0dSmrg 23235324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 23245324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23255324fb0dSmrg 23265324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 23275324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 23285324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 23295324fb0dSmrg &mc_address_cmd, &va_cmd); 23305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23315324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 23325324fb0dSmrg 23335324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 23345324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23355324fb0dSmrg &bo_shader, &ptr_shader, 23365324fb0dSmrg &mc_address_shader, &va_shader); 23375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 233888f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 23395324fb0dSmrg 23405324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY ); 23415324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23425324fb0dSmrg 23435324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 23445324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23455324fb0dSmrg &bo_src, (void **)&ptr_src, 23465324fb0dSmrg &mc_address_src, &va_src); 23475324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23485324fb0dSmrg 23495324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 23505324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23515324fb0dSmrg &bo_dst, (void **)&ptr_dst, 23525324fb0dSmrg &mc_address_dst, &va_dst); 23535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23545324fb0dSmrg 23555324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 23565324fb0dSmrg 23575324fb0dSmrg i = 0; 23585324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 23595324fb0dSmrg 23605324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 23615324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 23625324fb0dSmrg 23635324fb0dSmrg /* Writes shader state to HW */ 23645324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 23655324fb0dSmrg 23665324fb0dSmrg /* Write constant data */ 23675324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 23685324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23695324fb0dSmrg ptr_cmd[i++] = 0x240; 23705324fb0dSmrg ptr_cmd[i++] = mc_address_src; 23715324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 23725324fb0dSmrg ptr_cmd[i++] = 0x400; 23735324fb0dSmrg ptr_cmd[i++] = 0x74fac; 23745324fb0dSmrg 23755324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 23765324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23775324fb0dSmrg ptr_cmd[i++] = 0x244; 23785324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 23795324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 23805324fb0dSmrg ptr_cmd[i++] = 0x400; 23815324fb0dSmrg ptr_cmd[i++] = 0x74fac; 23825324fb0dSmrg 238388f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 238488f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 238588f8a8d2Smrg ptr_cmd[i++] = 0x215; 238688f8a8d2Smrg ptr_cmd[i++] = 0; 238788f8a8d2Smrg 23885324fb0dSmrg /* dispatch direct command */ 23895324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 23905324fb0dSmrg ptr_cmd[i++] = 0x10; 23915324fb0dSmrg ptr_cmd[i++] = 1; 23925324fb0dSmrg ptr_cmd[i++] = 1; 23935324fb0dSmrg ptr_cmd[i++] = 1; 23945324fb0dSmrg 23955324fb0dSmrg while (i & 7) 23965324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 23975324fb0dSmrg 23985324fb0dSmrg resources[0] = bo_shader; 23995324fb0dSmrg resources[1] = bo_src; 24005324fb0dSmrg resources[2] = bo_dst; 24015324fb0dSmrg resources[3] = bo_cmd; 24025324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 24035324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24045324fb0dSmrg 24055324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 24065324fb0dSmrg ib_info.size = i; 24075324fb0dSmrg ibs_request.ip_type = ip_type; 24085324fb0dSmrg ibs_request.ring = ring; 24095324fb0dSmrg ibs_request.resources = bo_list; 24105324fb0dSmrg ibs_request.number_of_ibs = 1; 24115324fb0dSmrg ibs_request.ibs = &ib_info; 24125324fb0dSmrg ibs_request.fence_info.handle = NULL; 24135324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 24145324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24155324fb0dSmrg 24165324fb0dSmrg fence_status.ip_type = ip_type; 24175324fb0dSmrg fence_status.ip_instance = 0; 24185324fb0dSmrg fence_status.ring = ring; 24195324fb0dSmrg fence_status.context = context_handle; 24205324fb0dSmrg fence_status.fence = ibs_request.seq_no; 24215324fb0dSmrg 24225324fb0dSmrg /* wait for IB accomplished */ 24235324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 24245324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 24255324fb0dSmrg 0, &expired); 24265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24275324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 24285324fb0dSmrg 24295324fb0dSmrg /* verify if memcpy test result meets with expected */ 24305324fb0dSmrg i = 0; 24315324fb0dSmrg while(i < bo_dst_size) { 24325324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 24335324fb0dSmrg i++; 24345324fb0dSmrg } 24355324fb0dSmrg 24365324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 24375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24385324fb0dSmrg 24395324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 24405324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24415324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 24425324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24435324fb0dSmrg 24445324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 24455324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24465324fb0dSmrg 24475324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 24485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24495324fb0dSmrg 24505324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 24515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24525324fb0dSmrg} 245388f8a8d2Smrg 245488f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void) 24555324fb0dSmrg{ 24565324fb0dSmrg int r; 24575324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 24585324fb0dSmrg uint32_t ring_id; 24595324fb0dSmrg 24605324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 24615324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 246288f8a8d2Smrg if (!info.available_rings) 246388f8a8d2Smrg printf("SKIP ... as there's no compute ring\n"); 24645324fb0dSmrg 24655324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 24665324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 24675324fb0dSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 24685324fb0dSmrg } 246988f8a8d2Smrg} 247088f8a8d2Smrg 247188f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void) 247288f8a8d2Smrg{ 247388f8a8d2Smrg int r; 247488f8a8d2Smrg struct drm_amdgpu_info_hw_ip info; 247588f8a8d2Smrg uint32_t ring_id; 24765324fb0dSmrg 24775324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 24785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 247988f8a8d2Smrg if (!info.available_rings) 248088f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 24815324fb0dSmrg 24825324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 24835324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 24845324fb0dSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 24855324fb0dSmrg } 24865324fb0dSmrg} 24875324fb0dSmrg 24885324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 24895324fb0dSmrg{ 24905324fb0dSmrg int i; 24915324fb0dSmrg uint32_t shader_offset= 256; 24925324fb0dSmrg uint32_t mem_offset, patch_code_offset; 24935324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 24945324fb0dSmrg const uint32_t *shader; 24955324fb0dSmrg const uint32_t *patchinfo_code; 24965324fb0dSmrg const uint32_t *patchcode_offset; 24975324fb0dSmrg 24985324fb0dSmrg switch (ps_type) { 24995324fb0dSmrg case PS_CONST: 25005324fb0dSmrg shader = ps_const_shader_gfx9; 25015324fb0dSmrg shader_size = sizeof(ps_const_shader_gfx9); 25025324fb0dSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 25035324fb0dSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 25045324fb0dSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 25055324fb0dSmrg break; 25065324fb0dSmrg case PS_TEX: 25075324fb0dSmrg shader = ps_tex_shader_gfx9; 25085324fb0dSmrg shader_size = sizeof(ps_tex_shader_gfx9); 25095324fb0dSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 25105324fb0dSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 25115324fb0dSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 25125324fb0dSmrg break; 25135324fb0dSmrg default: 25145324fb0dSmrg return -1; 25155324fb0dSmrg break; 25165324fb0dSmrg } 25175324fb0dSmrg 25185324fb0dSmrg /* write main shader program */ 25195324fb0dSmrg for (i = 0 ; i < 10; i++) { 25205324fb0dSmrg mem_offset = i * shader_offset; 25215324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 25225324fb0dSmrg } 25235324fb0dSmrg 25245324fb0dSmrg /* overwrite patch codes */ 25255324fb0dSmrg for (i = 0 ; i < 10; i++) { 25265324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 25275324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 25285324fb0dSmrg memcpy(ptr + mem_offset, 25295324fb0dSmrg patchinfo_code + patch_code_offset, 25305324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 25315324fb0dSmrg } 25325324fb0dSmrg 25335324fb0dSmrg return 0; 25345324fb0dSmrg} 25355324fb0dSmrg 25365324fb0dSmrg/* load RectPosTexFast_VS */ 25375324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr) 25385324fb0dSmrg{ 25395324fb0dSmrg const uint32_t *shader; 25405324fb0dSmrg uint32_t shader_size; 25415324fb0dSmrg 25425324fb0dSmrg shader = vs_RectPosTexFast_shader_gfx9; 25435324fb0dSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 25445324fb0dSmrg 25455324fb0dSmrg memcpy(ptr, shader, shader_size); 25465324fb0dSmrg 25475324fb0dSmrg return 0; 25485324fb0dSmrg} 25495324fb0dSmrg 25505324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr) 25515324fb0dSmrg{ 25525324fb0dSmrg int i = 0; 25535324fb0dSmrg const uint32_t *preamblecache_ptr; 25545324fb0dSmrg uint32_t preamblecache_size; 25555324fb0dSmrg 25565324fb0dSmrg /* Write context control and load shadowing register if necessary */ 25575324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 25585324fb0dSmrg ptr[i++] = 0x80000000; 25595324fb0dSmrg ptr[i++] = 0x80000000; 25605324fb0dSmrg 25615324fb0dSmrg preamblecache_ptr = preamblecache_gfx9; 25625324fb0dSmrg preamblecache_size = sizeof(preamblecache_gfx9); 25635324fb0dSmrg 25645324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 25655324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 25665324fb0dSmrg} 25675324fb0dSmrg 25685324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 25695324fb0dSmrg uint64_t dst_addr) 25705324fb0dSmrg{ 25715324fb0dSmrg int i = 0; 25725324fb0dSmrg 25735324fb0dSmrg /* setup color buffer */ 25745324fb0dSmrg /* offset reg 25755324fb0dSmrg 0xA318 CB_COLOR0_BASE 25765324fb0dSmrg 0xA319 CB_COLOR0_BASE_EXT 25775324fb0dSmrg 0xA31A CB_COLOR0_ATTRIB2 25785324fb0dSmrg 0xA31B CB_COLOR0_VIEW 25795324fb0dSmrg 0xA31C CB_COLOR0_INFO 25805324fb0dSmrg 0xA31D CB_COLOR0_ATTRIB 25815324fb0dSmrg 0xA31E CB_COLOR0_DCC_CONTROL 25825324fb0dSmrg 0xA31F CB_COLOR0_CMASK 25835324fb0dSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 25845324fb0dSmrg 0xA321 CB_COLOR0_FMASK 25855324fb0dSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 25865324fb0dSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 25875324fb0dSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 25885324fb0dSmrg 0xA325 CB_COLOR0_DCC_BASE 25895324fb0dSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 25905324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 25915324fb0dSmrg ptr[i++] = 0x318; 25925324fb0dSmrg ptr[i++] = dst_addr >> 8; 25935324fb0dSmrg ptr[i++] = dst_addr >> 40; 25945324fb0dSmrg ptr[i++] = 0x7c01f; 25955324fb0dSmrg ptr[i++] = 0; 25965324fb0dSmrg ptr[i++] = 0x50438; 25975324fb0dSmrg ptr[i++] = 0x10140000; 25985324fb0dSmrg i += 9; 25995324fb0dSmrg 26005324fb0dSmrg /* mmCB_MRT0_EPITCH */ 26015324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26025324fb0dSmrg ptr[i++] = 0x1e8; 26035324fb0dSmrg ptr[i++] = 0x1f; 26045324fb0dSmrg 26055324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 26065324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26075324fb0dSmrg ptr[i++] = 0x32b; 26085324fb0dSmrg ptr[i++] = 0; 26095324fb0dSmrg 26105324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 26115324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26125324fb0dSmrg ptr[i++] = 0x33a; 26135324fb0dSmrg ptr[i++] = 0; 26145324fb0dSmrg 26155324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 26165324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26175324fb0dSmrg ptr[i++] = 0x1c5; 26185324fb0dSmrg ptr[i++] = 9; 26195324fb0dSmrg 26205324fb0dSmrg /* Setup depth buffer */ 26215324fb0dSmrg /* mmDB_Z_INFO */ 26225324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 26235324fb0dSmrg ptr[i++] = 0xe; 26245324fb0dSmrg i += 2; 26255324fb0dSmrg 26265324fb0dSmrg return i; 26275324fb0dSmrg} 26285324fb0dSmrg 26295324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr) 26305324fb0dSmrg{ 26315324fb0dSmrg int i = 0; 26325324fb0dSmrg const uint32_t *cached_cmd_ptr; 26335324fb0dSmrg uint32_t cached_cmd_size; 26345324fb0dSmrg 26355324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 26365324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26375324fb0dSmrg ptr[i++] = 0xd7; 26385324fb0dSmrg ptr[i++] = 0; 26395324fb0dSmrg 26405324fb0dSmrg ptr[i++] = 0xffff1000; 26415324fb0dSmrg ptr[i++] = 0xc0021000; 26425324fb0dSmrg 26435324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26445324fb0dSmrg ptr[i++] = 0xd7; 26455324fb0dSmrg ptr[i++] = 1; 26465324fb0dSmrg 26475324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 26485324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 26495324fb0dSmrg ptr[i++] = 0x2fe; 26505324fb0dSmrg i += 16; 26515324fb0dSmrg 26525324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 26535324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 26545324fb0dSmrg ptr[i++] = 0x2f5; 26555324fb0dSmrg i += 2; 26565324fb0dSmrg 26575324fb0dSmrg cached_cmd_ptr = cached_cmd_gfx9; 26585324fb0dSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 26595324fb0dSmrg 26605324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 26615324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 26625324fb0dSmrg 26635324fb0dSmrg return i; 26645324fb0dSmrg} 26655324fb0dSmrg 26665324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 26675324fb0dSmrg int ps_type, 26685324fb0dSmrg uint64_t shader_addr) 26695324fb0dSmrg{ 26705324fb0dSmrg int i = 0; 26715324fb0dSmrg 26725324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 26735324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26745324fb0dSmrg ptr[i++] = 0x207; 26755324fb0dSmrg ptr[i++] = 0; 26765324fb0dSmrg 26775324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 26785324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26795324fb0dSmrg ptr[i++] = 0x46; 26805324fb0dSmrg ptr[i++] = 0xffff; 26815324fb0dSmrg 26825324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 26835324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 26845324fb0dSmrg ptr[i++] = 0x48; 26855324fb0dSmrg ptr[i++] = shader_addr >> 8; 26865324fb0dSmrg ptr[i++] = shader_addr >> 40; 26875324fb0dSmrg 26885324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 26895324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26905324fb0dSmrg ptr[i++] = 0x4a; 26915324fb0dSmrg ptr[i++] = 0xc0081; 26925324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 26935324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26945324fb0dSmrg ptr[i++] = 0x4b; 26955324fb0dSmrg ptr[i++] = 0x18; 26965324fb0dSmrg 26975324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 26985324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26995324fb0dSmrg ptr[i++] = 0x1b1; 27005324fb0dSmrg ptr[i++] = 2; 27015324fb0dSmrg 27025324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 27035324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 27045324fb0dSmrg ptr[i++] = 0x1c3; 27055324fb0dSmrg ptr[i++] = 4; 27065324fb0dSmrg 27075324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 27085324fb0dSmrg ptr[i++] = 0x4c; 27095324fb0dSmrg i += 2; 27105324fb0dSmrg ptr[i++] = 0x42000000; 27115324fb0dSmrg ptr[i++] = 0x42000000; 27125324fb0dSmrg 27135324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 27145324fb0dSmrg ptr[i++] = 0x50; 27155324fb0dSmrg i += 2; 27165324fb0dSmrg if (ps_type == PS_CONST) { 27175324fb0dSmrg i += 2; 27185324fb0dSmrg } else if (ps_type == PS_TEX) { 27195324fb0dSmrg ptr[i++] = 0x3f800000; 27205324fb0dSmrg ptr[i++] = 0x3f800000; 27215324fb0dSmrg } 27225324fb0dSmrg 27235324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 27245324fb0dSmrg ptr[i++] = 0x54; 27255324fb0dSmrg i += 4; 27265324fb0dSmrg 27275324fb0dSmrg return i; 27285324fb0dSmrg} 27295324fb0dSmrg 27305324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 27315324fb0dSmrg int ps_type, 27325324fb0dSmrg uint64_t shader_addr) 27335324fb0dSmrg{ 27345324fb0dSmrg int i, j; 27355324fb0dSmrg const uint32_t *sh_registers; 27365324fb0dSmrg const uint32_t *context_registers; 27375324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 27385324fb0dSmrg 27395324fb0dSmrg if (ps_type == PS_CONST) { 27405324fb0dSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 27415324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 27425324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 27435324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 27445324fb0dSmrg } else if (ps_type == PS_TEX) { 27455324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 27465324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 27475324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 27485324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 27495324fb0dSmrg } 27505324fb0dSmrg 27515324fb0dSmrg i = 0; 27525324fb0dSmrg 27535324fb0dSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 27545324fb0dSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 27555324fb0dSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 27565324fb0dSmrg shader_addr += 256 * 9; 27575324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 27585324fb0dSmrg ptr[i++] = 0x7; 27595324fb0dSmrg ptr[i++] = 0xffff; 27605324fb0dSmrg ptr[i++] = shader_addr >> 8; 27615324fb0dSmrg ptr[i++] = shader_addr >> 40; 27625324fb0dSmrg 27635324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 27645324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 27655324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 27665324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 27675324fb0dSmrg } 27685324fb0dSmrg 27695324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 27705324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 27715324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 27725324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 27735324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 27745324fb0dSmrg } 27755324fb0dSmrg 27765324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 27775324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 27785324fb0dSmrg ptr[i++] = 0x1b3; 27795324fb0dSmrg ptr[i++] = 2; 27805324fb0dSmrg } 27815324fb0dSmrg } 27825324fb0dSmrg 27835324fb0dSmrg return i; 27845324fb0dSmrg} 27855324fb0dSmrg 27865324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr) 27875324fb0dSmrg{ 27885324fb0dSmrg int i = 0; 27895324fb0dSmrg 27905324fb0dSmrg /* mmIA_MULTI_VGT_PARAM */ 27915324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 27925324fb0dSmrg ptr[i++] = 0x40000258; 27935324fb0dSmrg ptr[i++] = 0xd00ff; 27945324fb0dSmrg 27955324fb0dSmrg /* mmVGT_PRIMITIVE_TYPE */ 27965324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 27975324fb0dSmrg ptr[i++] = 0x10000242; 27985324fb0dSmrg ptr[i++] = 0x11; 27995324fb0dSmrg 28005324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 28015324fb0dSmrg ptr[i++] = 3; 28025324fb0dSmrg ptr[i++] = 2; 28035324fb0dSmrg 28045324fb0dSmrg return i; 28055324fb0dSmrg} 28065324fb0dSmrg 28075324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 28085324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 28095324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 28105324fb0dSmrg uint64_t mc_address_shader_ps, 28115324fb0dSmrg uint64_t mc_address_shader_vs, 28125324fb0dSmrg uint32_t ring_id) 28135324fb0dSmrg{ 28145324fb0dSmrg amdgpu_context_handle context_handle; 28155324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 28165324fb0dSmrg volatile unsigned char *ptr_dst; 28175324fb0dSmrg uint32_t *ptr_cmd; 28185324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 28195324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 28205324fb0dSmrg int i, r; 28215324fb0dSmrg int bo_dst_size = 16384; 28225324fb0dSmrg int bo_cmd_size = 4096; 28235324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 28245324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 28255324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 28265324fb0dSmrg uint32_t expired; 28275324fb0dSmrg amdgpu_bo_list_handle bo_list; 28285324fb0dSmrg 28295324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 28305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28315324fb0dSmrg 28325324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 28335324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 28345324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 28355324fb0dSmrg &mc_address_cmd, &va_cmd); 28365324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28375324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 28385324fb0dSmrg 28395324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28405324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28415324fb0dSmrg &bo_dst, (void **)&ptr_dst, 28425324fb0dSmrg &mc_address_dst, &va_dst); 28435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28445324fb0dSmrg 28455324fb0dSmrg i = 0; 28465324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 28475324fb0dSmrg 28485324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 28495324fb0dSmrg 28505324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 28515324fb0dSmrg 28525324fb0dSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs); 28535324fb0dSmrg 28545324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 28555324fb0dSmrg 28565324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 28575324fb0dSmrg ptr_cmd[i++] = 0xc; 28585324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28595324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28605324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28615324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28625324fb0dSmrg 28635324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 28645324fb0dSmrg 28655324fb0dSmrg while (i & 7) 28665324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 28675324fb0dSmrg 28685324fb0dSmrg resources[0] = bo_dst; 28695324fb0dSmrg resources[1] = bo_shader_ps; 28705324fb0dSmrg resources[2] = bo_shader_vs; 28715324fb0dSmrg resources[3] = bo_cmd; 28725324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 28735324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28745324fb0dSmrg 28755324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 28765324fb0dSmrg ib_info.size = i; 28775324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 28785324fb0dSmrg ibs_request.ring = ring_id; 28795324fb0dSmrg ibs_request.resources = bo_list; 28805324fb0dSmrg ibs_request.number_of_ibs = 1; 28815324fb0dSmrg ibs_request.ibs = &ib_info; 28825324fb0dSmrg ibs_request.fence_info.handle = NULL; 28835324fb0dSmrg 28845324fb0dSmrg /* submit CS */ 28855324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 28865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28875324fb0dSmrg 28885324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 28895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28905324fb0dSmrg 28915324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 28925324fb0dSmrg fence_status.ip_instance = 0; 28935324fb0dSmrg fence_status.ring = ring_id; 28945324fb0dSmrg fence_status.context = context_handle; 28955324fb0dSmrg fence_status.fence = ibs_request.seq_no; 28965324fb0dSmrg 28975324fb0dSmrg /* wait for IB accomplished */ 28985324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 28995324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 29005324fb0dSmrg 0, &expired); 29015324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29025324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 29035324fb0dSmrg 29045324fb0dSmrg /* verify if memset test result meets with expected */ 29055324fb0dSmrg i = 0; 29065324fb0dSmrg while(i < bo_dst_size) { 29075324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 29085324fb0dSmrg } 29095324fb0dSmrg 29105324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 29115324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29125324fb0dSmrg 29135324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 29145324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29155324fb0dSmrg 29165324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 29175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29185324fb0dSmrg} 29195324fb0dSmrg 29205324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 29215324fb0dSmrg uint32_t ring) 29225324fb0dSmrg{ 29235324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 29245324fb0dSmrg void *ptr_shader_ps; 29255324fb0dSmrg void *ptr_shader_vs; 29265324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 29275324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 29285324fb0dSmrg int r; 29295324fb0dSmrg int bo_shader_size = 4096; 29305324fb0dSmrg 29315324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 29325324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29335324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 29345324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 29355324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 293688f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 29375324fb0dSmrg 29385324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 29395324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29405324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 29415324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 29425324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 294388f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 29445324fb0dSmrg 29455324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 29465324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29475324fb0dSmrg 29485324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 29495324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29505324fb0dSmrg 29515324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 29525324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 29535324fb0dSmrg 29545324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 29555324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29565324fb0dSmrg 29575324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 29585324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29595324fb0dSmrg} 29605324fb0dSmrg 29615324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 29625324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 29635324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 29645324fb0dSmrg uint64_t mc_address_shader_ps, 29655324fb0dSmrg uint64_t mc_address_shader_vs, 29665324fb0dSmrg uint32_t ring) 29675324fb0dSmrg{ 29685324fb0dSmrg amdgpu_context_handle context_handle; 29695324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 29705324fb0dSmrg volatile unsigned char *ptr_dst; 29715324fb0dSmrg unsigned char *ptr_src; 29725324fb0dSmrg uint32_t *ptr_cmd; 29735324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 29745324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 29755324fb0dSmrg int i, r; 29765324fb0dSmrg int bo_size = 16384; 29775324fb0dSmrg int bo_cmd_size = 4096; 29785324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 29795324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 29805324fb0dSmrg uint32_t hang_state, hangs, expired; 29815324fb0dSmrg amdgpu_bo_list_handle bo_list; 29825324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 29835324fb0dSmrg 29845324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 29855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29865324fb0dSmrg 29875324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 29885324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 29895324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 29905324fb0dSmrg &mc_address_cmd, &va_cmd); 29915324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29925324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 29935324fb0dSmrg 29945324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 29955324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29965324fb0dSmrg &bo_src, (void **)&ptr_src, 29975324fb0dSmrg &mc_address_src, &va_src); 29985324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29995324fb0dSmrg 30005324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 30015324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30025324fb0dSmrg &bo_dst, (void **)&ptr_dst, 30035324fb0dSmrg &mc_address_dst, &va_dst); 30045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30055324fb0dSmrg 30065324fb0dSmrg memset(ptr_src, 0x55, bo_size); 30075324fb0dSmrg 30085324fb0dSmrg i = 0; 30095324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 30105324fb0dSmrg 30115324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 30125324fb0dSmrg 30135324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 30145324fb0dSmrg 30155324fb0dSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs); 30165324fb0dSmrg 30175324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 30185324fb0dSmrg 30195324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 30205324fb0dSmrg ptr_cmd[i++] = 0xc; 30215324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 8; 30225324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 30235324fb0dSmrg ptr_cmd[i++] = 0x7c01f; 30245324fb0dSmrg ptr_cmd[i++] = 0x90500fac; 30255324fb0dSmrg ptr_cmd[i++] = 0x3e000; 30265324fb0dSmrg i += 3; 30275324fb0dSmrg 30285324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 30295324fb0dSmrg ptr_cmd[i++] = 0x14; 30305324fb0dSmrg ptr_cmd[i++] = 0x92; 30315324fb0dSmrg i += 3; 30325324fb0dSmrg 303388f8a8d2Smrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 30345324fb0dSmrg ptr_cmd[i++] = 0x191; 30355324fb0dSmrg ptr_cmd[i++] = 0; 30365324fb0dSmrg 30375324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 30385324fb0dSmrg 30395324fb0dSmrg while (i & 7) 30405324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 30415324fb0dSmrg 30425324fb0dSmrg resources[0] = bo_dst; 30435324fb0dSmrg resources[1] = bo_src; 30445324fb0dSmrg resources[2] = bo_shader_ps; 30455324fb0dSmrg resources[3] = bo_shader_vs; 30465324fb0dSmrg resources[4] = bo_cmd; 30475324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 30485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30495324fb0dSmrg 30505324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 30515324fb0dSmrg ib_info.size = i; 30525324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 30535324fb0dSmrg ibs_request.ring = ring; 30545324fb0dSmrg ibs_request.resources = bo_list; 30555324fb0dSmrg ibs_request.number_of_ibs = 1; 30565324fb0dSmrg ibs_request.ibs = &ib_info; 30575324fb0dSmrg ibs_request.fence_info.handle = NULL; 30585324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 30595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30605324fb0dSmrg 30615324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 30625324fb0dSmrg fence_status.ip_instance = 0; 30635324fb0dSmrg fence_status.ring = ring; 30645324fb0dSmrg fence_status.context = context_handle; 30655324fb0dSmrg fence_status.fence = ibs_request.seq_no; 30665324fb0dSmrg 30675324fb0dSmrg /* wait for IB accomplished */ 30685324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 30695324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 30705324fb0dSmrg 0, &expired); 30715324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30725324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 30735324fb0dSmrg 30745324fb0dSmrg /* verify if memcpy test result meets with expected */ 30755324fb0dSmrg i = 0; 30765324fb0dSmrg while(i < bo_size) { 30775324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 30785324fb0dSmrg i++; 30795324fb0dSmrg } 30805324fb0dSmrg 30815324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 30825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30835324fb0dSmrg 30845324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 30855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30865324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 30875324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30885324fb0dSmrg 30895324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 30905324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30915324fb0dSmrg 30925324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 30935324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30945324fb0dSmrg} 30955324fb0dSmrg 30965324fb0dSmrgstatic void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring) 30975324fb0dSmrg{ 30985324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 30995324fb0dSmrg void *ptr_shader_ps; 31005324fb0dSmrg void *ptr_shader_vs; 31015324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 31025324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 31035324fb0dSmrg int bo_shader_size = 4096; 31045324fb0dSmrg int r; 31055324fb0dSmrg 31065324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 31075324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 31085324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 31095324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 31105324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 311188f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 31125324fb0dSmrg 31135324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 31145324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 31155324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 31165324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 31175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 311888f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 31195324fb0dSmrg 31205324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX); 31215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31225324fb0dSmrg 31235324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 31245324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31255324fb0dSmrg 31265324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 31275324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 31285324fb0dSmrg 31295324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 31305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31315324fb0dSmrg 31325324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 31335324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31345324fb0dSmrg} 31355324fb0dSmrg 31365324fb0dSmrgstatic void amdgpu_draw_test(void) 31375324fb0dSmrg{ 31385324fb0dSmrg int r; 31395324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 31405324fb0dSmrg uint32_t ring_id; 31415324fb0dSmrg 31425324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 31435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 314488f8a8d2Smrg if (!info.available_rings) 314588f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 31465324fb0dSmrg 31475324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 31485324fb0dSmrg amdgpu_memset_draw_test(device_handle, ring_id); 31495324fb0dSmrg amdgpu_memcpy_draw_test(device_handle, ring_id); 31505324fb0dSmrg } 31515324fb0dSmrg} 315288f8a8d2Smrg 315388f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void) 315488f8a8d2Smrg{ 315588f8a8d2Smrg int r; 315688f8a8d2Smrg char debugfs_path[256], tmp[10]; 315788f8a8d2Smrg int fd; 315888f8a8d2Smrg struct stat sbuf; 315988f8a8d2Smrg amdgpu_context_handle context_handle; 316088f8a8d2Smrg uint32_t hang_state, hangs; 316188f8a8d2Smrg 316288f8a8d2Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 316388f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 316488f8a8d2Smrg 316588f8a8d2Smrg r = fstat(drm_amdgpu[0], &sbuf); 316688f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 316788f8a8d2Smrg 316888f8a8d2Smrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 316988f8a8d2Smrg fd = open(debugfs_path, O_RDONLY); 317088f8a8d2Smrg CU_ASSERT(fd >= 0); 317188f8a8d2Smrg 317288f8a8d2Smrg r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 317388f8a8d2Smrg CU_ASSERT(r > 0); 317488f8a8d2Smrg 317588f8a8d2Smrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 317688f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 317788f8a8d2Smrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 317888f8a8d2Smrg 317988f8a8d2Smrg close(fd); 318088f8a8d2Smrg r = amdgpu_cs_ctx_free(context_handle); 318188f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 318288f8a8d2Smrg 318388f8a8d2Smrg amdgpu_compute_dispatch_test(); 318488f8a8d2Smrg amdgpu_gfx_dispatch_test(); 318588f8a8d2Smrg} 3186