basic_tests.c revision 9bd392ad
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 2788f8a8d2Smrg#include <sys/types.h> 2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS 2988f8a8d2Smrg#include <sys/sysmacros.h> 3088f8a8d2Smrg#endif 3188f8a8d2Smrg#include <sys/stat.h> 3288f8a8d2Smrg#include <fcntl.h> 339bd392adSmrg#if HAVE_ALLOCA_H 343f012e29Smrg# include <alloca.h> 353f012e29Smrg#endif 3600a23bdaSmrg#include <sys/wait.h> 373f012e29Smrg 383f012e29Smrg#include "CUnit/Basic.h" 393f012e29Smrg 403f012e29Smrg#include "amdgpu_test.h" 413f012e29Smrg#include "amdgpu_drm.h" 427cdc0497Smrg#include "util_math.h" 433f012e29Smrg 443f012e29Smrgstatic amdgpu_device_handle device_handle; 453f012e29Smrgstatic uint32_t major_version; 463f012e29Smrgstatic uint32_t minor_version; 47d8807b2fSmrgstatic uint32_t family_id; 483f012e29Smrg 493f012e29Smrgstatic void amdgpu_query_info_test(void); 503f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 513f012e29Smrgstatic void amdgpu_command_submission_compute(void); 52d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 533f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 543f012e29Smrgstatic void amdgpu_userptr_test(void); 553f012e29Smrgstatic void amdgpu_semaphore_test(void); 5600a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 5700a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 5888f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void); 5988f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void); 605324fb0dSmrgstatic void amdgpu_draw_test(void); 6188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void); 623f012e29Smrg 633f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 643f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 653f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 6600a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 6700a23bdaSmrg unsigned ip_type, 6800a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 6900a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 7000a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 7100a23bdaSmrg struct amdgpu_cs_request *ibs_request); 7200a23bdaSmrg 733f012e29SmrgCU_TestInfo basic_tests[] = { 743f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 753f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 7600a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 773f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 783f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 79d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 803f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 813f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 8200a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 8388f8a8d2Smrg { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 8488f8a8d2Smrg { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 855324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 8688f8a8d2Smrg { "GPU reset Test", amdgpu_gpu_reset_test }, 873f012e29Smrg CU_TEST_INFO_NULL, 883f012e29Smrg}; 899bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 903f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 913f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 923f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 933f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 943f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 953f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 963f012e29Smrg /* 0 = byte fill 973f012e29Smrg * 2 = DW fill 983f012e29Smrg */ 993f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1003f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 1013f012e29Smrg (((op) & 0xFF) << 0)) 1023f012e29Smrg#define SDMA_OPCODE_WRITE 2 1033f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1043f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 1053f012e29Smrg 1063f012e29Smrg#define SDMA_OPCODE_COPY 1 1073f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 1083f012e29Smrg 1093f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1103f012e29Smrg#define SDMA_NOP 0x0 1113f012e29Smrg 1123f012e29Smrg/* PM4 */ 1133f012e29Smrg#define PACKET_TYPE0 0 1143f012e29Smrg#define PACKET_TYPE1 1 1153f012e29Smrg#define PACKET_TYPE2 2 1163f012e29Smrg#define PACKET_TYPE3 3 1173f012e29Smrg 1183f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1193f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1203f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1213f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1223f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1233f012e29Smrg ((reg) & 0xFFFF) | \ 1243f012e29Smrg ((n) & 0x3FFF) << 16) 1253f012e29Smrg#define CP_PACKET2 0x80000000 1263f012e29Smrg#define PACKET2_PAD_SHIFT 0 1273f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1283f012e29Smrg 1293f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1303f012e29Smrg 1313f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1323f012e29Smrg (((op) & 0xFF) << 8) | \ 1333f012e29Smrg ((n) & 0x3FFF) << 16) 1345324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1353f012e29Smrg 1363f012e29Smrg/* Packet 3 types */ 1373f012e29Smrg#define PACKET3_NOP 0x10 1383f012e29Smrg 1393f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1403f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1413f012e29Smrg /* 0 - register 1423f012e29Smrg * 1 - memory (sync - via GRBM) 1433f012e29Smrg * 2 - gl2 1443f012e29Smrg * 3 - gds 1453f012e29Smrg * 4 - reserved 1463f012e29Smrg * 5 - memory (async - direct) 1473f012e29Smrg */ 1483f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1493f012e29Smrg#define WR_CONFIRM (1 << 20) 1503f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1513f012e29Smrg /* 0 - LRU 1523f012e29Smrg * 1 - Stream 1533f012e29Smrg */ 1543f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1553f012e29Smrg /* 0 - me 1563f012e29Smrg * 1 - pfp 1573f012e29Smrg * 2 - ce 1583f012e29Smrg */ 1593f012e29Smrg 1603f012e29Smrg#define PACKET3_DMA_DATA 0x50 1613f012e29Smrg/* 1. header 1623f012e29Smrg * 2. CONTROL 1633f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1643f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1653f012e29Smrg * 5. DST_ADDR_LO [31:0] 1663f012e29Smrg * 6. DST_ADDR_HI [7:0] 1673f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1683f012e29Smrg */ 1693f012e29Smrg/* CONTROL */ 1703f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 1713f012e29Smrg /* 0 - ME 1723f012e29Smrg * 1 - PFP 1733f012e29Smrg */ 1743f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 1753f012e29Smrg /* 0 - LRU 1763f012e29Smrg * 1 - Stream 1773f012e29Smrg * 2 - Bypass 1783f012e29Smrg */ 1793f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 1803f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 1813f012e29Smrg /* 0 - DST_ADDR using DAS 1823f012e29Smrg * 1 - GDS 1833f012e29Smrg * 3 - DST_ADDR using L2 1843f012e29Smrg */ 1853f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 1863f012e29Smrg /* 0 - LRU 1873f012e29Smrg * 1 - Stream 1883f012e29Smrg * 2 - Bypass 1893f012e29Smrg */ 1903f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 1913f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 1923f012e29Smrg /* 0 - SRC_ADDR using SAS 1933f012e29Smrg * 1 - GDS 1943f012e29Smrg * 2 - DATA 1953f012e29Smrg * 3 - SRC_ADDR using L2 1963f012e29Smrg */ 1973f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 1983f012e29Smrg/* COMMAND */ 1993f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 2003f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 2013f012e29Smrg /* 0 - none 2023f012e29Smrg * 1 - 8 in 16 2033f012e29Smrg * 2 - 8 in 32 2043f012e29Smrg * 3 - 8 in 64 2053f012e29Smrg */ 2063f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 2073f012e29Smrg /* 0 - none 2083f012e29Smrg * 1 - 8 in 16 2093f012e29Smrg * 2 - 8 in 32 2103f012e29Smrg * 3 - 8 in 64 2113f012e29Smrg */ 2123f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2133f012e29Smrg /* 0 - memory 2143f012e29Smrg * 1 - register 2153f012e29Smrg */ 2163f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2173f012e29Smrg /* 0 - memory 2183f012e29Smrg * 1 - register 2193f012e29Smrg */ 2203f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2213f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2223f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2233f012e29Smrg 224d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 225d8807b2fSmrg (((b) & 0x1) << 26) | \ 226d8807b2fSmrg (((t) & 0x1) << 23) | \ 227d8807b2fSmrg (((s) & 0x1) << 22) | \ 228d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 229d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 230d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 231d8807b2fSmrg#define SDMA_NOP_SI 0xf 232d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 233d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 234d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 235d8807b2fSmrg /* 0 - ME 236d8807b2fSmrg * 1 - PFP 237d8807b2fSmrg */ 238d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 239d8807b2fSmrg /* 0 - DST_ADDR using DAS 240d8807b2fSmrg * 1 - GDS 241d8807b2fSmrg * 3 - DST_ADDR using L2 242d8807b2fSmrg */ 243d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 244d8807b2fSmrg /* 0 - SRC_ADDR using SAS 245d8807b2fSmrg * 1 - GDS 246d8807b2fSmrg * 2 - DATA 247d8807b2fSmrg * 3 - SRC_ADDR using L2 248d8807b2fSmrg */ 249d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 250d8807b2fSmrg 25100a23bdaSmrg 25200a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 25300a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 25400a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 25500a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 25600a23bdaSmrg 25700a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 25800a23bdaSmrg 25900a23bdaSmrg#define PKT3_SET_SH_REG 0x76 26000a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 26100a23bdaSmrg 26200a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2635324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2645324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 2655324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 2665324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 2675324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 26800a23bdaSmrg/* gfx 8 */ 26900a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 27000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 27100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 27200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 27300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 27400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 27500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 27600a23bdaSmrg 27700a23bdaSmrg 27800a23bdaSmrg 27900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 28000a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 28100a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 28200a23bdaSmrg ((num & 0x000000ff) << 24)) 28300a23bdaSmrg 28400a23bdaSmrg 28500a23bdaSmrg/* Shader code 28600a23bdaSmrg * void main() 28700a23bdaSmrg{ 28800a23bdaSmrg 28900a23bdaSmrg float x = some_input; 29000a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 29100a23bdaSmrg x = sin(x); 29200a23bdaSmrg 29300a23bdaSmrg u[0] = 42u; 29400a23bdaSmrg} 29500a23bdaSmrg*/ 29600a23bdaSmrg 29700a23bdaSmrgstatic uint32_t shader_bin[] = { 29800a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 29900a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 30000a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 30100a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 30200a23bdaSmrg}; 30300a23bdaSmrg 30400a23bdaSmrg#define CODE_OFFSET 512 30500a23bdaSmrg#define DATA_OFFSET 1024 30600a23bdaSmrg 3075324fb0dSmrgenum cs_type { 3085324fb0dSmrg CS_BUFFERCLEAR, 3099bd392adSmrg CS_BUFFERCOPY, 3109bd392adSmrg CS_HANG, 3119bd392adSmrg CS_HANG_SLOW 3125324fb0dSmrg}; 3135324fb0dSmrg 3145324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3155324fb0dSmrg 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 3165324fb0dSmrg 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 3175324fb0dSmrg 0xBF810000 3185324fb0dSmrg}; 3195324fb0dSmrg 3205324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3215324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3225324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3235324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3245324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3255324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3265324fb0dSmrg}; 3275324fb0dSmrg 3285324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3295324fb0dSmrg 3305324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3315324fb0dSmrg 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 3325324fb0dSmrg 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 3335324fb0dSmrg}; 3345324fb0dSmrg 3355324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3365324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3375324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3385324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3395324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3405324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3415324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3425324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3435324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 34488f8a8d2Smrg 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 3455324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3465324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3475324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3485324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3495324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3505324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 35188f8a8d2Smrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 35288f8a8d2Smrg 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 3535324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 3545324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 3555324fb0dSmrg 0xc0017900, 0x24b, 0x0 3565324fb0dSmrg}; 3575324fb0dSmrg 3585324fb0dSmrgenum ps_type { 3595324fb0dSmrg PS_CONST, 3609bd392adSmrg PS_TEX, 3619bd392adSmrg PS_HANG, 3629bd392adSmrg PS_HANG_SLOW 3635324fb0dSmrg}; 3645324fb0dSmrg 3655324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 3665324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 3675324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 3685324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 3695324fb0dSmrg}; 3705324fb0dSmrg 3715324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 3725324fb0dSmrg 3735324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 3745324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 3755324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 3765324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 3775324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 3785324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3795324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3805324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3815324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3825324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3835324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 3845324fb0dSmrg } 3855324fb0dSmrg}; 3865324fb0dSmrg 3875324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 3885324fb0dSmrg 0x00000004 3895324fb0dSmrg}; 3905324fb0dSmrg 3915324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 3925324fb0dSmrg 3935324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 3945324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 3955324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 3965324fb0dSmrg}; 3975324fb0dSmrg 3985324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 3995324fb0dSmrg 4005324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 4015324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4025324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 4035324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4045324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4055324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4065324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4075324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4085324fb0dSmrg}; 4095324fb0dSmrg 4105324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 4115324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 4125324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 4135324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 4145324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 4155324fb0dSmrg 0x00000100, 0xBF810000 4165324fb0dSmrg}; 4175324fb0dSmrg 4185324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 4195324fb0dSmrg 0x0000000B 4205324fb0dSmrg}; 4215324fb0dSmrg 4225324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 4235324fb0dSmrg 4245324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 4255324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4265324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 4275324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 4285324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 4295324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4305324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4315324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4325324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4335324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4345324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 4355324fb0dSmrg } 4365324fb0dSmrg}; 4375324fb0dSmrg 4385324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 4395324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 4405324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 4415324fb0dSmrg}; 4425324fb0dSmrg 4435324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 4445324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4455324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 4465324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4475324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4485324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4495324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4505324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4515324fb0dSmrg}; 4525324fb0dSmrg 4535324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 4545324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 4555324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 4565324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 4575324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 4585324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 4595324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 4605324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 4615324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 4625324fb0dSmrg}; 4635324fb0dSmrg 4645324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 4655324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 4665324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 4675324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 4689bd392adSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 4695324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 4705324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 4715324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 4725324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 4735324fb0dSmrg}; 47400a23bdaSmrg 4759bd392adSmrgunsigned int memcpy_ps_hang[] = { 4769bd392adSmrg 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 4779bd392adSmrg 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 4789bd392adSmrg 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 4799bd392adSmrg 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 4809bd392adSmrg 0xF800180F, 0x03020100, 0xBF810000 4819bd392adSmrg}; 4829bd392adSmrg 4839bd392adSmrgstruct amdgpu_test_shader { 4849bd392adSmrg uint32_t *shader; 4859bd392adSmrg uint32_t header_length; 4869bd392adSmrg uint32_t body_length; 4879bd392adSmrg uint32_t foot_length; 4889bd392adSmrg}; 4899bd392adSmrg 4909bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = { 4919bd392adSmrg 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 4929bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 4939bd392adSmrg}; 4949bd392adSmrg 4959bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 4969bd392adSmrg memcpy_cs_hang_slow_ai_codes, 4979bd392adSmrg 4, 4989bd392adSmrg 3, 4999bd392adSmrg 1 5009bd392adSmrg}; 5019bd392adSmrg 5029bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = { 5039bd392adSmrg 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 5049bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 5059bd392adSmrg}; 5069bd392adSmrg 5079bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 5089bd392adSmrg memcpy_cs_hang_slow_rv_codes, 5099bd392adSmrg 4, 5109bd392adSmrg 3, 5119bd392adSmrg 1 5129bd392adSmrg}; 5139bd392adSmrg 5149bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = { 5159bd392adSmrg 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 5169bd392adSmrg 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 5179bd392adSmrg 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 5189bd392adSmrg 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 5199bd392adSmrg 0x03020100, 0xbf810000 5209bd392adSmrg}; 5219bd392adSmrg 5229bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 5239bd392adSmrg memcpy_ps_hang_slow_ai_codes, 5249bd392adSmrg 7, 5259bd392adSmrg 2, 5269bd392adSmrg 9 5279bd392adSmrg}; 5289bd392adSmrg 5297cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 5307cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 5317cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 5327cdc0497Smrg uint64_t *mc_address, 5337cdc0497Smrg amdgpu_va_handle *va_handle) 5347cdc0497Smrg{ 5357cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 5367cdc0497Smrg amdgpu_bo_handle buf_handle; 5377cdc0497Smrg amdgpu_va_handle handle; 5387cdc0497Smrg uint64_t vmc_addr; 5397cdc0497Smrg int r; 5407cdc0497Smrg 5417cdc0497Smrg request.alloc_size = size; 5427cdc0497Smrg request.phys_alignment = alignment; 5437cdc0497Smrg request.preferred_heap = heap; 5447cdc0497Smrg request.flags = alloc_flags; 5457cdc0497Smrg 5467cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 5477cdc0497Smrg if (r) 5487cdc0497Smrg return r; 5497cdc0497Smrg 5507cdc0497Smrg r = amdgpu_va_range_alloc(dev, 5517cdc0497Smrg amdgpu_gpu_va_range_general, 5527cdc0497Smrg size, alignment, 0, &vmc_addr, 5537cdc0497Smrg &handle, 0); 5547cdc0497Smrg if (r) 5557cdc0497Smrg goto error_va_alloc; 5567cdc0497Smrg 5577cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 5587cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 5597cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 5607cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 5617cdc0497Smrg mapping_flags, 5627cdc0497Smrg AMDGPU_VA_OP_MAP); 5637cdc0497Smrg if (r) 5647cdc0497Smrg goto error_va_map; 5657cdc0497Smrg 5667cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 5677cdc0497Smrg if (r) 5687cdc0497Smrg goto error_cpu_map; 5697cdc0497Smrg 5707cdc0497Smrg *bo = buf_handle; 5717cdc0497Smrg *mc_address = vmc_addr; 5727cdc0497Smrg *va_handle = handle; 5737cdc0497Smrg 5747cdc0497Smrg return 0; 5757cdc0497Smrg 5767cdc0497Smrg error_cpu_map: 5777cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 5787cdc0497Smrg 5797cdc0497Smrg error_va_map: 5807cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 5817cdc0497Smrg 5827cdc0497Smrg error_va_alloc: 5837cdc0497Smrg amdgpu_bo_free(buf_handle); 5847cdc0497Smrg return r; 5857cdc0497Smrg} 5867cdc0497Smrg 5877cdc0497Smrg 5887cdc0497Smrg 5893f012e29Smrgint suite_basic_tests_init(void) 5903f012e29Smrg{ 591d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 5923f012e29Smrg int r; 5933f012e29Smrg 5943f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 5953f012e29Smrg &minor_version, &device_handle); 5963f012e29Smrg 597d8807b2fSmrg if (r) { 598037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 599037b3c26Smrg printf("\n\nError:%s. " 600037b3c26Smrg "Hint:Try to run this test program as root.", 601037b3c26Smrg strerror(errno)); 6023f012e29Smrg return CUE_SINIT_FAILED; 603037b3c26Smrg } 604d8807b2fSmrg 605d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 606d8807b2fSmrg if (r) 607d8807b2fSmrg return CUE_SINIT_FAILED; 608d8807b2fSmrg 609d8807b2fSmrg family_id = gpu_info.family_id; 610d8807b2fSmrg 611d8807b2fSmrg return CUE_SUCCESS; 6123f012e29Smrg} 6133f012e29Smrg 6143f012e29Smrgint suite_basic_tests_clean(void) 6153f012e29Smrg{ 6163f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 6173f012e29Smrg 6183f012e29Smrg if (r == 0) 6193f012e29Smrg return CUE_SUCCESS; 6203f012e29Smrg else 6213f012e29Smrg return CUE_SCLEAN_FAILED; 6223f012e29Smrg} 6233f012e29Smrg 6243f012e29Smrgstatic void amdgpu_query_info_test(void) 6253f012e29Smrg{ 6263f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 6273f012e29Smrg uint32_t version, feature; 6283f012e29Smrg int r; 6293f012e29Smrg 6303f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 6313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6323f012e29Smrg 6333f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 6343f012e29Smrg 0, &version, &feature); 6353f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6363f012e29Smrg} 6373f012e29Smrg 6383f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 6393f012e29Smrg{ 6403f012e29Smrg amdgpu_context_handle context_handle; 6413f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 6423f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 6433f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 6443f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 6453f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 6463f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 6473f012e29Smrg uint32_t *ptr; 6483f012e29Smrg uint32_t expired; 6493f012e29Smrg amdgpu_bo_list_handle bo_list; 6503f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 651d8807b2fSmrg int r, i = 0; 6523f012e29Smrg 6533f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 6543f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6553f012e29Smrg 6563f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 6573f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6583f012e29Smrg &ib_result_handle, &ib_result_cpu, 6593f012e29Smrg &ib_result_mc_address, &va_handle); 6603f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6613f012e29Smrg 6623f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 6633f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6643f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 6653f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 6663f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6673f012e29Smrg 6683f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 6693f012e29Smrg ib_result_ce_handle, &bo_list); 6703f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6713f012e29Smrg 6723f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 6733f012e29Smrg 6743f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 6753f012e29Smrg ptr = ib_result_ce_cpu; 676d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 677d8807b2fSmrg ptr[i++] = 0xc0008900; 678d8807b2fSmrg ptr[i++] = 0; 679d8807b2fSmrg } 680d8807b2fSmrg ptr[i++] = 0xc0008400; 681d8807b2fSmrg ptr[i++] = 1; 6823f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 683d8807b2fSmrg ib_info[0].size = i; 6843f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 6853f012e29Smrg 6863f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 6873f012e29Smrg ptr = ib_result_cpu; 6883f012e29Smrg ptr[0] = 0xc0008600; 6893f012e29Smrg ptr[1] = 0x00000001; 6903f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 6913f012e29Smrg ib_info[1].size = 2; 6923f012e29Smrg 6933f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 6943f012e29Smrg ibs_request.number_of_ibs = 2; 6953f012e29Smrg ibs_request.ibs = ib_info; 6963f012e29Smrg ibs_request.resources = bo_list; 6973f012e29Smrg ibs_request.fence_info.handle = NULL; 6983f012e29Smrg 6993f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 7003f012e29Smrg 7013f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7023f012e29Smrg 7033f012e29Smrg fence_status.context = context_handle; 7043f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7053f012e29Smrg fence_status.ip_instance = 0; 7063f012e29Smrg fence_status.fence = ibs_request.seq_no; 7073f012e29Smrg 7083f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7093f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7103f012e29Smrg 0, &expired); 7113f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7123f012e29Smrg 7133f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7143f012e29Smrg ib_result_mc_address, 4096); 7153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7163f012e29Smrg 7173f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 7183f012e29Smrg ib_result_ce_mc_address, 4096); 7193f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7203f012e29Smrg 7213f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 7223f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7233f012e29Smrg 7243f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 7253f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7263f012e29Smrg 7273f012e29Smrg} 7283f012e29Smrg 7293f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 7303f012e29Smrg{ 7313f012e29Smrg amdgpu_context_handle context_handle; 7323f012e29Smrg amdgpu_bo_handle ib_result_handle; 7333f012e29Smrg void *ib_result_cpu; 7343f012e29Smrg uint64_t ib_result_mc_address; 7353f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 7363f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 7373f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 7383f012e29Smrg uint32_t *ptr; 7393f012e29Smrg uint32_t expired; 7403f012e29Smrg amdgpu_bo_list_handle bo_list; 7413f012e29Smrg amdgpu_va_handle va_handle; 742d8807b2fSmrg int r, i = 0; 7433f012e29Smrg 7443f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 7453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7463f012e29Smrg 7473f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7483f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7493f012e29Smrg &ib_result_handle, &ib_result_cpu, 7503f012e29Smrg &ib_result_mc_address, &va_handle); 7513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7523f012e29Smrg 7533f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 7543f012e29Smrg &bo_list); 7553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7563f012e29Smrg 7573f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 7583f012e29Smrg 7593f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 7603f012e29Smrg ptr = ib_result_cpu; 761d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 762d8807b2fSmrg ptr[i++] = 0xc0008900; 763d8807b2fSmrg ptr[i++] = 0; 764d8807b2fSmrg } 765d8807b2fSmrg ptr[i++] = 0xc0008400; 766d8807b2fSmrg ptr[i++] = 1; 7673f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 768d8807b2fSmrg ib_info[0].size = i; 7693f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 7703f012e29Smrg 7713f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 7723f012e29Smrg ptr[0] = 0xc0008600; 7733f012e29Smrg ptr[1] = 0x00000001; 7743f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 7753f012e29Smrg ib_info[1].size = 2; 7763f012e29Smrg 7773f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 7783f012e29Smrg ibs_request.number_of_ibs = 2; 7793f012e29Smrg ibs_request.ibs = ib_info; 7803f012e29Smrg ibs_request.resources = bo_list; 7813f012e29Smrg ibs_request.fence_info.handle = NULL; 7823f012e29Smrg 7833f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 7843f012e29Smrg 7853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7863f012e29Smrg 7873f012e29Smrg fence_status.context = context_handle; 7883f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7893f012e29Smrg fence_status.ip_instance = 0; 7903f012e29Smrg fence_status.fence = ibs_request.seq_no; 7913f012e29Smrg 7923f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7933f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7943f012e29Smrg 0, &expired); 7953f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7963f012e29Smrg 7973f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7983f012e29Smrg ib_result_mc_address, 4096); 7993f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8003f012e29Smrg 8013f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 8023f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8033f012e29Smrg 8043f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 8053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8063f012e29Smrg} 8073f012e29Smrg 8083f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 8093f012e29Smrg{ 8103f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 8113f012e29Smrg} 8123f012e29Smrg 8133f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 8143f012e29Smrg{ 8153f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 8163f012e29Smrg} 8173f012e29Smrg 8183f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 8193f012e29Smrg{ 8203f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 8213f012e29Smrg} 8223f012e29Smrg 82300a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 82400a23bdaSmrg{ 82500a23bdaSmrg const int sdma_write_length = 1024; 82600a23bdaSmrg const int pm4_dw = 256; 82700a23bdaSmrg amdgpu_context_handle context_handle; 82800a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 82900a23bdaSmrg amdgpu_bo_handle *resources; 83000a23bdaSmrg uint32_t *pm4; 83100a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 83200a23bdaSmrg struct amdgpu_cs_request *ibs_request; 83300a23bdaSmrg uint64_t bo1_mc, bo2_mc; 83400a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 83500a23bdaSmrg int i, j, r, loop1, loop2; 83600a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 83700a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 83800a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 83900a23bdaSmrg 84000a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 84100a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 84200a23bdaSmrg 84300a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 84400a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 84500a23bdaSmrg 84600a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 84700a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 84800a23bdaSmrg 84900a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 85000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 85100a23bdaSmrg 85200a23bdaSmrg /* prepare resource */ 85300a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 85400a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 85500a23bdaSmrg 85600a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 85700a23bdaSmrg 0, &vram_info); 85800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 85900a23bdaSmrg 86000a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 86100a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 86200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 86300a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 86400a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 86500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 86600a23bdaSmrg 86700a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 86800a23bdaSmrg 0, >t_info); 86900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 87000a23bdaSmrg 87100a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 87200a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 87300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 87400a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 87500a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 87600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 87700a23bdaSmrg 87800a23bdaSmrg 87900a23bdaSmrg 88000a23bdaSmrg loop1 = loop2 = 0; 88100a23bdaSmrg /* run 9 circle to test all mapping combination */ 88200a23bdaSmrg while(loop1 < 2) { 88300a23bdaSmrg while(loop2 < 2) { 88400a23bdaSmrg /* allocate UC bo1for sDMA use */ 88500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 88600a23bdaSmrg sdma_write_length, 4096, 88700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 88800a23bdaSmrg gtt_flags[loop1], &bo1, 88900a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 89000a23bdaSmrg &bo1_va_handle); 89100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 89200a23bdaSmrg 89300a23bdaSmrg /* set bo1 */ 89400a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 89500a23bdaSmrg 89600a23bdaSmrg /* allocate UC bo2 for sDMA use */ 89700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 89800a23bdaSmrg sdma_write_length, 4096, 89900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 90000a23bdaSmrg gtt_flags[loop2], &bo2, 90100a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 90200a23bdaSmrg &bo2_va_handle); 90300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 90400a23bdaSmrg 90500a23bdaSmrg /* clear bo2 */ 90600a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 90700a23bdaSmrg 90800a23bdaSmrg resources[0] = bo1; 90900a23bdaSmrg resources[1] = bo2; 91000a23bdaSmrg resources[2] = vram_max[loop2]; 91100a23bdaSmrg resources[3] = gtt_max[loop2]; 91200a23bdaSmrg 91300a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 91400a23bdaSmrg i = j = 0; 91500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 91600a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 91700a23bdaSmrg sdma_write_length); 91800a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 91900a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 92000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 92100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 92200a23bdaSmrg } else { 92300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 92400a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 92500a23bdaSmrg pm4[i++] = sdma_write_length - 1; 92600a23bdaSmrg else 92700a23bdaSmrg pm4[i++] = sdma_write_length; 92800a23bdaSmrg pm4[i++] = 0; 92900a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 93000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 93100a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 93200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 93300a23bdaSmrg } 93400a23bdaSmrg 93500a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 93600a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 93700a23bdaSmrg i, pm4, 93800a23bdaSmrg 4, resources, 93900a23bdaSmrg ib_info, ibs_request); 94000a23bdaSmrg 94100a23bdaSmrg /* verify if SDMA test result meets with expected */ 94200a23bdaSmrg i = 0; 94300a23bdaSmrg while(i < sdma_write_length) { 94400a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 94500a23bdaSmrg } 94600a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 94700a23bdaSmrg sdma_write_length); 94800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94900a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 95000a23bdaSmrg sdma_write_length); 95100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 95200a23bdaSmrg loop2++; 95300a23bdaSmrg } 95400a23bdaSmrg loop2 = 0; 95500a23bdaSmrg loop1++; 95600a23bdaSmrg } 95700a23bdaSmrg amdgpu_bo_free(vram_max[0]); 95800a23bdaSmrg amdgpu_bo_free(vram_max[1]); 95900a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 96000a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 96100a23bdaSmrg /* clean resources */ 96200a23bdaSmrg free(resources); 96300a23bdaSmrg free(ibs_request); 96400a23bdaSmrg free(ib_info); 96500a23bdaSmrg free(pm4); 96600a23bdaSmrg 96700a23bdaSmrg /* end of test */ 96800a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 96900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 97000a23bdaSmrg} 97100a23bdaSmrg 97200a23bdaSmrg 9733f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 9743f012e29Smrg{ 9753f012e29Smrg /* write data using the CP */ 9763f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 9773f012e29Smrg /* const fill using the CP */ 9783f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 9793f012e29Smrg /* copy data using the CP */ 9803f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 9813f012e29Smrg /* separate IB buffers for multi-IB submission */ 9823f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 9833f012e29Smrg /* shared IB buffer for multi-IB submission */ 9843f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 9853f012e29Smrg} 9863f012e29Smrg 9873f012e29Smrgstatic void amdgpu_semaphore_test(void) 9883f012e29Smrg{ 9893f012e29Smrg amdgpu_context_handle context_handle[2]; 9903f012e29Smrg amdgpu_semaphore_handle sem; 9913f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 9923f012e29Smrg void *ib_result_cpu[2]; 9933f012e29Smrg uint64_t ib_result_mc_address[2]; 9943f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 9953f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 9963f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 9973f012e29Smrg uint32_t *ptr; 9983f012e29Smrg uint32_t expired; 999d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 10003f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 10013f012e29Smrg amdgpu_va_handle va_handle[2]; 10023f012e29Smrg int r, i; 10033f012e29Smrg 1004d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 1005d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1006d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 1007d8807b2fSmrg } else { 1008d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1009d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 1010d8807b2fSmrg } 1011d8807b2fSmrg 10123f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 10133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10143f012e29Smrg for (i = 0; i < 2; i++) { 10153f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 10163f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10173f012e29Smrg 10183f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 10193f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 10203f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 10213f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 10223f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10233f012e29Smrg 10243f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 10253f012e29Smrg NULL, &bo_list[i]); 10263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10273f012e29Smrg } 10283f012e29Smrg 10293f012e29Smrg /* 1. same context different engine */ 10303f012e29Smrg ptr = ib_result_cpu[0]; 1031d8807b2fSmrg ptr[0] = sdma_nop; 10323f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 10333f012e29Smrg ib_info[0].size = 1; 10343f012e29Smrg 10353f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 10363f012e29Smrg ibs_request[0].number_of_ibs = 1; 10373f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 10383f012e29Smrg ibs_request[0].resources = bo_list[0]; 10393f012e29Smrg ibs_request[0].fence_info.handle = NULL; 10403f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 10413f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10423f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 10433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10443f012e29Smrg 10453f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 10463f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10473f012e29Smrg ptr = ib_result_cpu[1]; 1048d8807b2fSmrg ptr[0] = gfx_nop; 10493f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 10503f012e29Smrg ib_info[1].size = 1; 10513f012e29Smrg 10523f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 10533f012e29Smrg ibs_request[1].number_of_ibs = 1; 10543f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 10553f012e29Smrg ibs_request[1].resources = bo_list[1]; 10563f012e29Smrg ibs_request[1].fence_info.handle = NULL; 10573f012e29Smrg 10583f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 10593f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10603f012e29Smrg 10613f012e29Smrg fence_status.context = context_handle[0]; 10623f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 10633f012e29Smrg fence_status.ip_instance = 0; 10643f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 10653f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10663f012e29Smrg 500000000, 0, &expired); 10673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10683f012e29Smrg CU_ASSERT_EQUAL(expired, true); 10693f012e29Smrg 10703f012e29Smrg /* 2. same engine different context */ 10713f012e29Smrg ptr = ib_result_cpu[0]; 1072d8807b2fSmrg ptr[0] = gfx_nop; 10733f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 10743f012e29Smrg ib_info[0].size = 1; 10753f012e29Smrg 10763f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 10773f012e29Smrg ibs_request[0].number_of_ibs = 1; 10783f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 10793f012e29Smrg ibs_request[0].resources = bo_list[0]; 10803f012e29Smrg ibs_request[0].fence_info.handle = NULL; 10813f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 10823f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10833f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 10843f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10853f012e29Smrg 10863f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 10873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10883f012e29Smrg ptr = ib_result_cpu[1]; 1089d8807b2fSmrg ptr[0] = gfx_nop; 10903f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 10913f012e29Smrg ib_info[1].size = 1; 10923f012e29Smrg 10933f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 10943f012e29Smrg ibs_request[1].number_of_ibs = 1; 10953f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 10963f012e29Smrg ibs_request[1].resources = bo_list[1]; 10973f012e29Smrg ibs_request[1].fence_info.handle = NULL; 10983f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 10993f012e29Smrg 11003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11013f012e29Smrg 11023f012e29Smrg fence_status.context = context_handle[1]; 11033f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 11043f012e29Smrg fence_status.ip_instance = 0; 11053f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 11063f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11073f012e29Smrg 500000000, 0, &expired); 11083f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11093f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1110d8807b2fSmrg 11113f012e29Smrg for (i = 0; i < 2; i++) { 11123f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 11133f012e29Smrg ib_result_mc_address[i], 4096); 11143f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11153f012e29Smrg 11163f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 11173f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11183f012e29Smrg 11193f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 11203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11213f012e29Smrg } 11223f012e29Smrg 11233f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 11243f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11253f012e29Smrg} 11263f012e29Smrg 11273f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 11283f012e29Smrg{ 11293f012e29Smrg amdgpu_context_handle context_handle; 11303f012e29Smrg amdgpu_bo_handle ib_result_handle; 11313f012e29Smrg void *ib_result_cpu; 11323f012e29Smrg uint64_t ib_result_mc_address; 11333f012e29Smrg struct amdgpu_cs_request ibs_request; 11343f012e29Smrg struct amdgpu_cs_ib_info ib_info; 11353f012e29Smrg struct amdgpu_cs_fence fence_status; 11363f012e29Smrg uint32_t *ptr; 11373f012e29Smrg uint32_t expired; 113800a23bdaSmrg int r, instance; 11393f012e29Smrg amdgpu_bo_list_handle bo_list; 11403f012e29Smrg amdgpu_va_handle va_handle; 1141d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1142d8807b2fSmrg 1143d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1144d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 11453f012e29Smrg 11463f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 11473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11483f012e29Smrg 1149d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 11503f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 11513f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 11523f012e29Smrg &ib_result_handle, &ib_result_cpu, 11533f012e29Smrg &ib_result_mc_address, &va_handle); 11543f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11553f012e29Smrg 11563f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 11573f012e29Smrg &bo_list); 11583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11593f012e29Smrg 11603f012e29Smrg ptr = ib_result_cpu; 1161d8807b2fSmrg memset(ptr, 0, 16); 1162d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 11633f012e29Smrg 11643f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 11653f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 11663f012e29Smrg ib_info.size = 16; 11673f012e29Smrg 11683f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 11693f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 11703f012e29Smrg ibs_request.ring = instance; 11713f012e29Smrg ibs_request.number_of_ibs = 1; 11723f012e29Smrg ibs_request.ibs = &ib_info; 11733f012e29Smrg ibs_request.resources = bo_list; 11743f012e29Smrg ibs_request.fence_info.handle = NULL; 11753f012e29Smrg 11763f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 11773f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 11783f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11793f012e29Smrg 11803f012e29Smrg fence_status.context = context_handle; 11813f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 11823f012e29Smrg fence_status.ip_instance = 0; 11833f012e29Smrg fence_status.ring = instance; 11843f012e29Smrg fence_status.fence = ibs_request.seq_no; 11853f012e29Smrg 11863f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11873f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 11883f012e29Smrg 0, &expired); 11893f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11903f012e29Smrg 11913f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 11923f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11933f012e29Smrg 11943f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 11953f012e29Smrg ib_result_mc_address, 4096); 11963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11973f012e29Smrg } 11983f012e29Smrg 11993f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 12003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12013f012e29Smrg} 12023f012e29Smrg 12033f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 12043f012e29Smrg{ 12053f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 12063f012e29Smrg} 12073f012e29Smrg 12083f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 12093f012e29Smrg{ 12103f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 12113f012e29Smrg} 12123f012e29Smrg 12133f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 12143f012e29Smrg{ 12153f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 12163f012e29Smrg} 12173f012e29Smrg 12183f012e29Smrgstatic void amdgpu_command_submission_compute(void) 12193f012e29Smrg{ 12203f012e29Smrg /* write data using the CP */ 12213f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 12223f012e29Smrg /* const fill using the CP */ 12233f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 12243f012e29Smrg /* copy data using the CP */ 12253f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 12263f012e29Smrg /* nop test */ 12273f012e29Smrg amdgpu_command_submission_compute_nop(); 12283f012e29Smrg} 12293f012e29Smrg 12303f012e29Smrg/* 12313f012e29Smrg * caller need create/release: 12323f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 12333f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 12343f012e29Smrg */ 12353f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 12363f012e29Smrg unsigned ip_type, 12373f012e29Smrg int instance, int pm4_dw, uint32_t *pm4_src, 12383f012e29Smrg int res_cnt, amdgpu_bo_handle *resources, 12393f012e29Smrg struct amdgpu_cs_ib_info *ib_info, 12403f012e29Smrg struct amdgpu_cs_request *ibs_request) 12413f012e29Smrg{ 12423f012e29Smrg int r; 12433f012e29Smrg uint32_t expired; 12443f012e29Smrg uint32_t *ring_ptr; 12453f012e29Smrg amdgpu_bo_handle ib_result_handle; 12463f012e29Smrg void *ib_result_cpu; 12473f012e29Smrg uint64_t ib_result_mc_address; 12483f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 12493f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 12503f012e29Smrg amdgpu_va_handle va_handle; 12513f012e29Smrg 12523f012e29Smrg /* prepare CS */ 12533f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 12543f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 12553f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 12563f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12573f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 12583f012e29Smrg 12593f012e29Smrg /* allocate IB */ 12603f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 12613f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 12623f012e29Smrg &ib_result_handle, &ib_result_cpu, 12633f012e29Smrg &ib_result_mc_address, &va_handle); 12643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12653f012e29Smrg 12663f012e29Smrg /* copy PM4 packet to ring from caller */ 12673f012e29Smrg ring_ptr = ib_result_cpu; 12683f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 12693f012e29Smrg 12703f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 12713f012e29Smrg ib_info->size = pm4_dw; 12723f012e29Smrg 12733f012e29Smrg ibs_request->ip_type = ip_type; 12743f012e29Smrg ibs_request->ring = instance; 12753f012e29Smrg ibs_request->number_of_ibs = 1; 12763f012e29Smrg ibs_request->ibs = ib_info; 12773f012e29Smrg ibs_request->fence_info.handle = NULL; 12783f012e29Smrg 12793f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 12803f012e29Smrg all_res[res_cnt] = ib_result_handle; 12813f012e29Smrg 12823f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 12833f012e29Smrg NULL, &ibs_request->resources); 12843f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12853f012e29Smrg 12863f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12873f012e29Smrg 12883f012e29Smrg /* submit CS */ 12893f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 12903f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12913f012e29Smrg 12923f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 12933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12943f012e29Smrg 12953f012e29Smrg fence_status.ip_type = ip_type; 12963f012e29Smrg fence_status.ip_instance = 0; 12973f012e29Smrg fence_status.ring = ibs_request->ring; 12983f012e29Smrg fence_status.context = context_handle; 12993f012e29Smrg fence_status.fence = ibs_request->seq_no; 13003f012e29Smrg 13013f012e29Smrg /* wait for IB accomplished */ 13023f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 13033f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 13043f012e29Smrg 0, &expired); 13053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13063f012e29Smrg CU_ASSERT_EQUAL(expired, true); 13073f012e29Smrg 13083f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 13093f012e29Smrg ib_result_mc_address, 4096); 13103f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13113f012e29Smrg} 13123f012e29Smrg 13133f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 13143f012e29Smrg{ 13153f012e29Smrg const int sdma_write_length = 128; 13163f012e29Smrg const int pm4_dw = 256; 13173f012e29Smrg amdgpu_context_handle context_handle; 13183f012e29Smrg amdgpu_bo_handle bo; 13193f012e29Smrg amdgpu_bo_handle *resources; 13203f012e29Smrg uint32_t *pm4; 13213f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 13223f012e29Smrg struct amdgpu_cs_request *ibs_request; 13233f012e29Smrg uint64_t bo_mc; 13243f012e29Smrg volatile uint32_t *bo_cpu; 132500a23bdaSmrg int i, j, r, loop, ring_id; 13263f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 13273f012e29Smrg amdgpu_va_handle va_handle; 132800a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 13293f012e29Smrg 13303f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 13313f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 13323f012e29Smrg 13333f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 13343f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 13353f012e29Smrg 13363f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 13373f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13383f012e29Smrg 133900a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 134000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 134100a23bdaSmrg 13423f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 13433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13443f012e29Smrg 13453f012e29Smrg /* prepare resource */ 13463f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 13473f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 13483f012e29Smrg 134900a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 135000a23bdaSmrg loop = 0; 135100a23bdaSmrg while(loop < 2) { 135200a23bdaSmrg /* allocate UC bo for sDMA use */ 135300a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 135400a23bdaSmrg sdma_write_length * sizeof(uint32_t), 135500a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 135600a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 135700a23bdaSmrg &bo_mc, &va_handle); 135800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 13593f012e29Smrg 136000a23bdaSmrg /* clear bo */ 136100a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 13623f012e29Smrg 136300a23bdaSmrg resources[0] = bo; 13643f012e29Smrg 136500a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 136600a23bdaSmrg i = j = 0; 136700a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 136800a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 136900a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 137000a23bdaSmrg sdma_write_length); 137100a23bdaSmrg else 137200a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 137300a23bdaSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 137400a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 137500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 137600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 137700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 137800a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 137900a23bdaSmrg pm4[i++] = sdma_write_length; 138000a23bdaSmrg while(j++ < sdma_write_length) 138100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 138200a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 138300a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 138400a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 138500a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 138600a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 138700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 138800a23bdaSmrg while(j++ < sdma_write_length) 138900a23bdaSmrg pm4[i++] = 0xdeadbeaf; 139000a23bdaSmrg } 13913f012e29Smrg 139200a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 139300a23bdaSmrg ip_type, ring_id, 139400a23bdaSmrg i, pm4, 139500a23bdaSmrg 1, resources, 139600a23bdaSmrg ib_info, ibs_request); 13973f012e29Smrg 139800a23bdaSmrg /* verify if SDMA test result meets with expected */ 139900a23bdaSmrg i = 0; 140000a23bdaSmrg while(i < sdma_write_length) { 140100a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 140200a23bdaSmrg } 14033f012e29Smrg 140400a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 140500a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 140600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 140700a23bdaSmrg loop++; 14083f012e29Smrg } 14093f012e29Smrg } 14103f012e29Smrg /* clean resources */ 14113f012e29Smrg free(resources); 14123f012e29Smrg free(ibs_request); 14133f012e29Smrg free(ib_info); 14143f012e29Smrg free(pm4); 14153f012e29Smrg 14163f012e29Smrg /* end of test */ 14173f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 14183f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14193f012e29Smrg} 14203f012e29Smrg 14213f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 14223f012e29Smrg{ 14233f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 14243f012e29Smrg} 14253f012e29Smrg 14263f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 14273f012e29Smrg{ 14283f012e29Smrg const int sdma_write_length = 1024 * 1024; 14293f012e29Smrg const int pm4_dw = 256; 14303f012e29Smrg amdgpu_context_handle context_handle; 14313f012e29Smrg amdgpu_bo_handle bo; 14323f012e29Smrg amdgpu_bo_handle *resources; 14333f012e29Smrg uint32_t *pm4; 14343f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 14353f012e29Smrg struct amdgpu_cs_request *ibs_request; 14363f012e29Smrg uint64_t bo_mc; 14373f012e29Smrg volatile uint32_t *bo_cpu; 143800a23bdaSmrg int i, j, r, loop, ring_id; 14393f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 14403f012e29Smrg amdgpu_va_handle va_handle; 144100a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 14423f012e29Smrg 14433f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 14443f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 14453f012e29Smrg 14463f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 14473f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 14483f012e29Smrg 14493f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 14503f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 14513f012e29Smrg 145200a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 145300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 145400a23bdaSmrg 14553f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 14563f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14573f012e29Smrg 14583f012e29Smrg /* prepare resource */ 14593f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 14603f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 14613f012e29Smrg 146200a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 146300a23bdaSmrg loop = 0; 146400a23bdaSmrg while(loop < 2) { 146500a23bdaSmrg /* allocate UC bo for sDMA use */ 146600a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 146700a23bdaSmrg sdma_write_length, 4096, 146800a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 146900a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 147000a23bdaSmrg &bo_mc, &va_handle); 147100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 14723f012e29Smrg 147300a23bdaSmrg /* clear bo */ 147400a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 14753f012e29Smrg 147600a23bdaSmrg resources[0] = bo; 14773f012e29Smrg 147800a23bdaSmrg /* fulfill PM4: test DMA const fill */ 147900a23bdaSmrg i = j = 0; 148000a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 148100a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 148200a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 148300a23bdaSmrg 0, 0, 0, 148400a23bdaSmrg sdma_write_length / 4); 148500a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 148600a23bdaSmrg pm4[i++] = 0xdeadbeaf; 148700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 148800a23bdaSmrg } else { 148900a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 149000a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 149100a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 149200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 149300a23bdaSmrg pm4[i++] = 0xdeadbeaf; 149400a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 149500a23bdaSmrg pm4[i++] = sdma_write_length - 1; 149600a23bdaSmrg else 149700a23bdaSmrg pm4[i++] = sdma_write_length; 149800a23bdaSmrg } 149900a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 150000a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 150100a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 150200a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 150300a23bdaSmrg pm4[i++] = 0xdeadbeaf; 150400a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 150500a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 150600a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 150700a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 150800a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 150900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1510d8807b2fSmrg pm4[i++] = sdma_write_length; 151100a23bdaSmrg } else { 151200a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 151300a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 151400a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 151500a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 151600a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 151700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 151800a23bdaSmrg pm4[i++] = 0; 151900a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 152000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 152100a23bdaSmrg pm4[i++] = sdma_write_length; 152200a23bdaSmrg } 1523d8807b2fSmrg } 15243f012e29Smrg 152500a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 152600a23bdaSmrg ip_type, ring_id, 152700a23bdaSmrg i, pm4, 152800a23bdaSmrg 1, resources, 152900a23bdaSmrg ib_info, ibs_request); 15303f012e29Smrg 153100a23bdaSmrg /* verify if SDMA test result meets with expected */ 153200a23bdaSmrg i = 0; 153300a23bdaSmrg while(i < (sdma_write_length / 4)) { 153400a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 153500a23bdaSmrg } 15363f012e29Smrg 153700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 153800a23bdaSmrg sdma_write_length); 153900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 154000a23bdaSmrg loop++; 154100a23bdaSmrg } 15423f012e29Smrg } 15433f012e29Smrg /* clean resources */ 15443f012e29Smrg free(resources); 15453f012e29Smrg free(ibs_request); 15463f012e29Smrg free(ib_info); 15473f012e29Smrg free(pm4); 15483f012e29Smrg 15493f012e29Smrg /* end of test */ 15503f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 15513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15523f012e29Smrg} 15533f012e29Smrg 15543f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 15553f012e29Smrg{ 15563f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 15573f012e29Smrg} 15583f012e29Smrg 15593f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 15603f012e29Smrg{ 15613f012e29Smrg const int sdma_write_length = 1024; 15623f012e29Smrg const int pm4_dw = 256; 15633f012e29Smrg amdgpu_context_handle context_handle; 15643f012e29Smrg amdgpu_bo_handle bo1, bo2; 15653f012e29Smrg amdgpu_bo_handle *resources; 15663f012e29Smrg uint32_t *pm4; 15673f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 15683f012e29Smrg struct amdgpu_cs_request *ibs_request; 15693f012e29Smrg uint64_t bo1_mc, bo2_mc; 15703f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 157100a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 15723f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 15733f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 157400a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 15753f012e29Smrg 15763f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 15773f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 15783f012e29Smrg 15793f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 15803f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 15813f012e29Smrg 15823f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 15833f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15843f012e29Smrg 158500a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 158600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 158700a23bdaSmrg 15883f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 15893f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15903f012e29Smrg 15913f012e29Smrg /* prepare resource */ 15923f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 15933f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 15943f012e29Smrg 159500a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 159600a23bdaSmrg loop1 = loop2 = 0; 159700a23bdaSmrg /* run 9 circle to test all mapping combination */ 159800a23bdaSmrg while(loop1 < 2) { 159900a23bdaSmrg while(loop2 < 2) { 160000a23bdaSmrg /* allocate UC bo1for sDMA use */ 160100a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 160200a23bdaSmrg sdma_write_length, 4096, 160300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 160400a23bdaSmrg gtt_flags[loop1], &bo1, 160500a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 160600a23bdaSmrg &bo1_va_handle); 160700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 160800a23bdaSmrg 160900a23bdaSmrg /* set bo1 */ 161000a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 161100a23bdaSmrg 161200a23bdaSmrg /* allocate UC bo2 for sDMA use */ 161300a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 161400a23bdaSmrg sdma_write_length, 4096, 161500a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 161600a23bdaSmrg gtt_flags[loop2], &bo2, 161700a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 161800a23bdaSmrg &bo2_va_handle); 161900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 162000a23bdaSmrg 162100a23bdaSmrg /* clear bo2 */ 162200a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 162300a23bdaSmrg 162400a23bdaSmrg resources[0] = bo1; 162500a23bdaSmrg resources[1] = bo2; 162600a23bdaSmrg 162700a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 162800a23bdaSmrg i = j = 0; 162900a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 163000a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 163100a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 163200a23bdaSmrg 0, 0, 0, 163300a23bdaSmrg sdma_write_length); 163400a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 163500a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 163600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 163700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 163800a23bdaSmrg } else { 163900a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 164000a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 164100a23bdaSmrg 0); 164200a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 164300a23bdaSmrg pm4[i++] = sdma_write_length - 1; 164400a23bdaSmrg else 164500a23bdaSmrg pm4[i++] = sdma_write_length; 164600a23bdaSmrg pm4[i++] = 0; 164700a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 164800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 164900a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 165000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 165100a23bdaSmrg } 165200a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 165300a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 165400a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 165500a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 165600a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 165700a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 165800a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 165900a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 166000a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 166100a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 166200a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 166300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1664d8807b2fSmrg pm4[i++] = sdma_write_length; 166500a23bdaSmrg } else { 166600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 166700a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 166800a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 166900a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 167000a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 167100a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 167200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 167300a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 167400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 167500a23bdaSmrg pm4[i++] = sdma_write_length; 167600a23bdaSmrg } 1677d8807b2fSmrg } 16783f012e29Smrg 167900a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 168000a23bdaSmrg ip_type, ring_id, 168100a23bdaSmrg i, pm4, 168200a23bdaSmrg 2, resources, 168300a23bdaSmrg ib_info, ibs_request); 16843f012e29Smrg 168500a23bdaSmrg /* verify if SDMA test result meets with expected */ 168600a23bdaSmrg i = 0; 168700a23bdaSmrg while(i < sdma_write_length) { 168800a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 168900a23bdaSmrg } 169000a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 169100a23bdaSmrg sdma_write_length); 169200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 169300a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 169400a23bdaSmrg sdma_write_length); 169500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 169600a23bdaSmrg loop2++; 16973f012e29Smrg } 169800a23bdaSmrg loop1++; 16993f012e29Smrg } 17003f012e29Smrg } 17013f012e29Smrg /* clean resources */ 17023f012e29Smrg free(resources); 17033f012e29Smrg free(ibs_request); 17043f012e29Smrg free(ib_info); 17053f012e29Smrg free(pm4); 17063f012e29Smrg 17073f012e29Smrg /* end of test */ 17083f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 17093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17103f012e29Smrg} 17113f012e29Smrg 17123f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 17133f012e29Smrg{ 17143f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 17153f012e29Smrg} 17163f012e29Smrg 17173f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 17183f012e29Smrg{ 17193f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 17203f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 17213f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 17223f012e29Smrg} 17233f012e29Smrg 1724d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1725d8807b2fSmrg{ 1726d8807b2fSmrg amdgpu_context_handle context_handle; 1727d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1728d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1729d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1730d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1731d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1732d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1733d8807b2fSmrg uint32_t *ptr; 1734d8807b2fSmrg uint32_t expired; 1735d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1736d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1737d8807b2fSmrg int r; 1738d8807b2fSmrg int i = 0, ib_cs_num = 2; 1739d8807b2fSmrg 1740d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1741d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1742d8807b2fSmrg 1743d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1744d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1745d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1746d8807b2fSmrg &ib_result_mc_address, &va_handle); 1747d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1748d8807b2fSmrg 1749d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1750d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1751d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1752d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1753d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1754d8807b2fSmrg 1755d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1756d8807b2fSmrg ib_result_ce_handle, &bo_list); 1757d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1758d8807b2fSmrg 1759d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1760d8807b2fSmrg 1761d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1762d8807b2fSmrg ptr = ib_result_ce_cpu; 1763d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1764d8807b2fSmrg ptr[i++] = 0xc0008900; 1765d8807b2fSmrg ptr[i++] = 0; 1766d8807b2fSmrg } 1767d8807b2fSmrg ptr[i++] = 0xc0008400; 1768d8807b2fSmrg ptr[i++] = 1; 1769d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1770d8807b2fSmrg ib_info[0].size = i; 1771d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1772d8807b2fSmrg 1773d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1774d8807b2fSmrg ptr = ib_result_cpu; 1775d8807b2fSmrg ptr[0] = 0xc0008600; 1776d8807b2fSmrg ptr[1] = 0x00000001; 1777d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1778d8807b2fSmrg ib_info[1].size = 2; 1779d8807b2fSmrg 1780d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1781d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1782d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1783d8807b2fSmrg ibs_request[i].ibs = ib_info; 1784d8807b2fSmrg ibs_request[i].resources = bo_list; 1785d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1786d8807b2fSmrg } 1787d8807b2fSmrg 1788d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1789d8807b2fSmrg 1790d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1791d8807b2fSmrg 1792d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1793d8807b2fSmrg fence_status[i].context = context_handle; 1794d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1795d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1796d8807b2fSmrg } 1797d8807b2fSmrg 1798d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1799d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1800d8807b2fSmrg &expired, NULL); 1801d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1802d8807b2fSmrg 1803d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1804d8807b2fSmrg ib_result_mc_address, 4096); 1805d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1806d8807b2fSmrg 1807d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1808d8807b2fSmrg ib_result_ce_mc_address, 4096); 1809d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1810d8807b2fSmrg 1811d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 1812d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1813d8807b2fSmrg 1814d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 1815d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1816d8807b2fSmrg} 1817d8807b2fSmrg 1818d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 1819d8807b2fSmrg{ 1820d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 1821d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 1822d8807b2fSmrg} 1823d8807b2fSmrg 18243f012e29Smrgstatic void amdgpu_userptr_test(void) 18253f012e29Smrg{ 18263f012e29Smrg int i, r, j; 18273f012e29Smrg uint32_t *pm4 = NULL; 18283f012e29Smrg uint64_t bo_mc; 18293f012e29Smrg void *ptr = NULL; 18303f012e29Smrg int pm4_dw = 256; 18313f012e29Smrg int sdma_write_length = 4; 18323f012e29Smrg amdgpu_bo_handle handle; 18333f012e29Smrg amdgpu_context_handle context_handle; 18343f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 18353f012e29Smrg struct amdgpu_cs_request *ibs_request; 18363f012e29Smrg amdgpu_bo_handle buf_handle; 18373f012e29Smrg amdgpu_va_handle va_handle; 18383f012e29Smrg 18393f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 18403f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 18413f012e29Smrg 18423f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 18433f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 18443f012e29Smrg 18453f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 18463f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 18473f012e29Smrg 18483f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 18493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18503f012e29Smrg 18513f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 18523f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 18533f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 18543f012e29Smrg 18553f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 18563f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 18573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18583f012e29Smrg 18593f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 18603f012e29Smrg amdgpu_gpu_va_range_general, 18613f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 18623f012e29Smrg &va_handle, 0); 18633f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18643f012e29Smrg 18653f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 18663f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18673f012e29Smrg 18683f012e29Smrg handle = buf_handle; 18693f012e29Smrg 18703f012e29Smrg j = i = 0; 1871d8807b2fSmrg 1872d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 1873d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1874d8807b2fSmrg sdma_write_length); 1875d8807b2fSmrg else 1876d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1877d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 18783f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 18793f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1880d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 1881d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 1882d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 1883d8807b2fSmrg pm4[i++] = sdma_write_length; 18843f012e29Smrg 18853f012e29Smrg while (j++ < sdma_write_length) 18863f012e29Smrg pm4[i++] = 0xdeadbeaf; 18873f012e29Smrg 188800a23bdaSmrg if (!fork()) { 188900a23bdaSmrg pm4[0] = 0x0; 189000a23bdaSmrg exit(0); 189100a23bdaSmrg } 189200a23bdaSmrg 18933f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 18943f012e29Smrg AMDGPU_HW_IP_DMA, 0, 18953f012e29Smrg i, pm4, 18963f012e29Smrg 1, &handle, 18973f012e29Smrg ib_info, ibs_request); 18983f012e29Smrg i = 0; 18993f012e29Smrg while (i < sdma_write_length) { 19003f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 19013f012e29Smrg } 19023f012e29Smrg free(ibs_request); 19033f012e29Smrg free(ib_info); 19043f012e29Smrg free(pm4); 19053f012e29Smrg 19063f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 19073f012e29Smrg CU_ASSERT_EQUAL(r, 0); 19083f012e29Smrg r = amdgpu_va_range_free(va_handle); 19093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 19103f012e29Smrg r = amdgpu_bo_free(buf_handle); 19113f012e29Smrg CU_ASSERT_EQUAL(r, 0); 19123f012e29Smrg free(ptr); 19133f012e29Smrg 19143f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 19153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 191600a23bdaSmrg 191700a23bdaSmrg wait(NULL); 191800a23bdaSmrg} 191900a23bdaSmrg 192000a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 192100a23bdaSmrg{ 192200a23bdaSmrg amdgpu_context_handle context_handle[2]; 192300a23bdaSmrg amdgpu_bo_handle ib_result_handle; 192400a23bdaSmrg void *ib_result_cpu; 192500a23bdaSmrg uint64_t ib_result_mc_address; 192600a23bdaSmrg struct amdgpu_cs_request ibs_request; 192700a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 192800a23bdaSmrg struct amdgpu_cs_fence fence_status; 192900a23bdaSmrg uint32_t expired; 193000a23bdaSmrg int i, j, r; 193100a23bdaSmrg amdgpu_bo_list_handle bo_list; 193200a23bdaSmrg amdgpu_va_handle va_handle; 193300a23bdaSmrg static uint32_t *ptr; 193400a23bdaSmrg uint64_t seq_no; 193500a23bdaSmrg 193600a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 193700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 193800a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 193900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 194000a23bdaSmrg 194100a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 194200a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 194300a23bdaSmrg &ib_result_handle, &ib_result_cpu, 194400a23bdaSmrg &ib_result_mc_address, &va_handle); 194500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 194600a23bdaSmrg 194700a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 194800a23bdaSmrg &bo_list); 194900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 195000a23bdaSmrg 195100a23bdaSmrg ptr = ib_result_cpu; 195200a23bdaSmrg i = 0; 195300a23bdaSmrg 195400a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 195500a23bdaSmrg 195600a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 195700a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 195800a23bdaSmrg ptr[i++] = 0x80000000; 195900a23bdaSmrg ptr[i++] = 0x80000000; 196000a23bdaSmrg 196100a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 196200a23bdaSmrg ptr[i++] = 0x80000000; 196300a23bdaSmrg 196400a23bdaSmrg 196500a23bdaSmrg /* Program compute regs */ 196600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 196700a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 196800a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 196900a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 197000a23bdaSmrg 197100a23bdaSmrg 197200a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 197300a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 197400a23bdaSmrg /* 197500a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 197600a23bdaSmrg SGPRS = 1 197700a23bdaSmrg PRIORITY = 0 197800a23bdaSmrg FLOAT_MODE = 192 (0xc0) 197900a23bdaSmrg PRIV = 0 198000a23bdaSmrg DX10_CLAMP = 1 198100a23bdaSmrg DEBUG_MODE = 0 198200a23bdaSmrg IEEE_MODE = 0 198300a23bdaSmrg BULKY = 0 198400a23bdaSmrg CDBG_USER = 0 198500a23bdaSmrg * 198600a23bdaSmrg */ 198700a23bdaSmrg ptr[i++] = 0x002c0040; 198800a23bdaSmrg 198900a23bdaSmrg 199000a23bdaSmrg /* 199100a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 199200a23bdaSmrg USER_SGPR = 8 199300a23bdaSmrg TRAP_PRESENT = 0 199400a23bdaSmrg TGID_X_EN = 0 199500a23bdaSmrg TGID_Y_EN = 0 199600a23bdaSmrg TGID_Z_EN = 0 199700a23bdaSmrg TG_SIZE_EN = 0 199800a23bdaSmrg TIDIG_COMP_CNT = 0 199900a23bdaSmrg EXCP_EN_MSB = 0 200000a23bdaSmrg LDS_SIZE = 0 200100a23bdaSmrg EXCP_EN = 0 200200a23bdaSmrg * 200300a23bdaSmrg */ 200400a23bdaSmrg ptr[i++] = 0x00000010; 200500a23bdaSmrg 200600a23bdaSmrg 200700a23bdaSmrg/* 200800a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 200900a23bdaSmrg WAVESIZE = 0 201000a23bdaSmrg * 201100a23bdaSmrg */ 201200a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 201300a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 201400a23bdaSmrg ptr[i++] = 0x00000100; 201500a23bdaSmrg 201600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 201700a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 201800a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 201900a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 202000a23bdaSmrg 202100a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 202200a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 202300a23bdaSmrg ptr[i++] = 0; 202400a23bdaSmrg 202500a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 202600a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 202700a23bdaSmrg ptr[i++] = 1; 202800a23bdaSmrg ptr[i++] = 1; 202900a23bdaSmrg ptr[i++] = 1; 203000a23bdaSmrg 203100a23bdaSmrg 203200a23bdaSmrg /* Dispatch */ 203300a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 203400a23bdaSmrg ptr[i++] = 1; 203500a23bdaSmrg ptr[i++] = 1; 203600a23bdaSmrg ptr[i++] = 1; 203700a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 203800a23bdaSmrg 203900a23bdaSmrg 204000a23bdaSmrg while (i & 7) 204100a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 204200a23bdaSmrg 204300a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 204400a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 204500a23bdaSmrg ib_info.size = i; 204600a23bdaSmrg 204700a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 204800a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 204900a23bdaSmrg ibs_request.ring = 0; 205000a23bdaSmrg ibs_request.number_of_ibs = 1; 205100a23bdaSmrg ibs_request.ibs = &ib_info; 205200a23bdaSmrg ibs_request.resources = bo_list; 205300a23bdaSmrg ibs_request.fence_info.handle = NULL; 205400a23bdaSmrg 205500a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 205600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 205700a23bdaSmrg seq_no = ibs_request.seq_no; 205800a23bdaSmrg 205900a23bdaSmrg 206000a23bdaSmrg 206100a23bdaSmrg /* Prepare second command with dependency on the first */ 206200a23bdaSmrg j = i; 206300a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 206400a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 206500a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 206600a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 206700a23bdaSmrg ptr[i++] = 99; 206800a23bdaSmrg 206900a23bdaSmrg while (i & 7) 207000a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 207100a23bdaSmrg 207200a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 207300a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 207400a23bdaSmrg ib_info.size = i - j; 207500a23bdaSmrg 207600a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 207700a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 207800a23bdaSmrg ibs_request.ring = 0; 207900a23bdaSmrg ibs_request.number_of_ibs = 1; 208000a23bdaSmrg ibs_request.ibs = &ib_info; 208100a23bdaSmrg ibs_request.resources = bo_list; 208200a23bdaSmrg ibs_request.fence_info.handle = NULL; 208300a23bdaSmrg 208400a23bdaSmrg ibs_request.number_of_dependencies = 1; 208500a23bdaSmrg 208600a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 208700a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 208800a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 208900a23bdaSmrg ibs_request.dependencies[0].ring = 0; 209000a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 209100a23bdaSmrg 209200a23bdaSmrg 209300a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 209400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 209500a23bdaSmrg 209600a23bdaSmrg 209700a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 209800a23bdaSmrg fence_status.context = context_handle[0]; 209900a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 210000a23bdaSmrg fence_status.ip_instance = 0; 210100a23bdaSmrg fence_status.ring = 0; 210200a23bdaSmrg fence_status.fence = ibs_request.seq_no; 210300a23bdaSmrg 210400a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 210500a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 210600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 210700a23bdaSmrg 210800a23bdaSmrg /* Expect the second command to wait for shader to complete */ 210900a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 211000a23bdaSmrg 211100a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 211200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 211300a23bdaSmrg 211400a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 211500a23bdaSmrg ib_result_mc_address, 4096); 211600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 211700a23bdaSmrg 211800a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 211900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 212000a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 212100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 212200a23bdaSmrg 212300a23bdaSmrg free(ibs_request.dependencies); 21243f012e29Smrg} 21255324fb0dSmrg 21269bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 21279bd392adSmrg{ 21289bd392adSmrg struct amdgpu_test_shader *shader; 21299bd392adSmrg int i, loop = 0x10000; 21309bd392adSmrg 21319bd392adSmrg switch (family) { 21329bd392adSmrg case AMDGPU_FAMILY_AI: 21339bd392adSmrg shader = &memcpy_cs_hang_slow_ai; 21349bd392adSmrg break; 21359bd392adSmrg case AMDGPU_FAMILY_RV: 21369bd392adSmrg shader = &memcpy_cs_hang_slow_rv; 21379bd392adSmrg break; 21389bd392adSmrg default: 21399bd392adSmrg return -1; 21409bd392adSmrg break; 21419bd392adSmrg } 21429bd392adSmrg 21439bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 21449bd392adSmrg 21459bd392adSmrg for (i = 0; i < loop; i++) 21469bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 21479bd392adSmrg shader->shader + shader->header_length, 21489bd392adSmrg shader->body_length * sizeof(uint32_t)); 21499bd392adSmrg 21509bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 21519bd392adSmrg shader->shader + shader->header_length + shader->body_length, 21529bd392adSmrg shader->foot_length * sizeof(uint32_t)); 21539bd392adSmrg 21549bd392adSmrg return 0; 21559bd392adSmrg} 21569bd392adSmrg 21575324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 21585324fb0dSmrg int cs_type) 21595324fb0dSmrg{ 21605324fb0dSmrg uint32_t shader_size; 21615324fb0dSmrg const uint32_t *shader; 21625324fb0dSmrg 21635324fb0dSmrg switch (cs_type) { 21645324fb0dSmrg case CS_BUFFERCLEAR: 21655324fb0dSmrg shader = bufferclear_cs_shader_gfx9; 21665324fb0dSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 21675324fb0dSmrg break; 21685324fb0dSmrg case CS_BUFFERCOPY: 21695324fb0dSmrg shader = buffercopy_cs_shader_gfx9; 21705324fb0dSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 21715324fb0dSmrg break; 21729bd392adSmrg case CS_HANG: 21739bd392adSmrg shader = memcpy_ps_hang; 21749bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 21759bd392adSmrg break; 21765324fb0dSmrg default: 21775324fb0dSmrg return -1; 21785324fb0dSmrg break; 21795324fb0dSmrg } 21805324fb0dSmrg 21815324fb0dSmrg memcpy(ptr, shader, shader_size); 21825324fb0dSmrg return 0; 21835324fb0dSmrg} 21845324fb0dSmrg 21855324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 21865324fb0dSmrg{ 21875324fb0dSmrg int i = 0; 21885324fb0dSmrg 21895324fb0dSmrg /* Write context control and load shadowing register if necessary */ 21905324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 21915324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 21925324fb0dSmrg ptr[i++] = 0x80000000; 21935324fb0dSmrg ptr[i++] = 0x80000000; 21945324fb0dSmrg } 21955324fb0dSmrg 21965324fb0dSmrg /* Issue commands to set default compute state. */ 21975324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 21985324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 21995324fb0dSmrg ptr[i++] = 0x204; 22005324fb0dSmrg i += 3; 220188f8a8d2Smrg 22025324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 22035324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 22045324fb0dSmrg ptr[i++] = 0x218; 22055324fb0dSmrg ptr[i++] = 0; 22065324fb0dSmrg 22075324fb0dSmrg return i; 22085324fb0dSmrg} 22095324fb0dSmrg 22105324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr) 22115324fb0dSmrg{ 22125324fb0dSmrg int i = 0; 22135324fb0dSmrg 22145324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 22155324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 22165324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 22175324fb0dSmrg ptr[i++] = 0x216; 22185324fb0dSmrg ptr[i++] = 0xffffffff; 22195324fb0dSmrg ptr[i++] = 0xffffffff; 22205324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 22215324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 22225324fb0dSmrg ptr[i++] = 0x219; 22235324fb0dSmrg ptr[i++] = 0xffffffff; 22245324fb0dSmrg ptr[i++] = 0xffffffff; 22255324fb0dSmrg 22265324fb0dSmrg return i; 22275324fb0dSmrg} 22285324fb0dSmrg 22295324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 22305324fb0dSmrg{ 22315324fb0dSmrg int i, j; 22325324fb0dSmrg 22335324fb0dSmrg i = 0; 22345324fb0dSmrg 22355324fb0dSmrg /* Writes shader state to HW */ 22365324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 22375324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 22385324fb0dSmrg ptr[i++] = 0x20c; 22395324fb0dSmrg ptr[i++] = (shader_addr >> 8); 22405324fb0dSmrg ptr[i++] = (shader_addr >> 40); 22415324fb0dSmrg /* write sh regs*/ 22425324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 22435324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 22445324fb0dSmrg /* - Gfx9ShRegBase */ 22455324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 22465324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 22475324fb0dSmrg } 22485324fb0dSmrg 22495324fb0dSmrg return i; 22505324fb0dSmrg} 22515324fb0dSmrg 22525324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 22535324fb0dSmrg uint32_t ip_type, 22545324fb0dSmrg uint32_t ring) 22555324fb0dSmrg{ 22565324fb0dSmrg amdgpu_context_handle context_handle; 22575324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 22585324fb0dSmrg volatile unsigned char *ptr_dst; 22595324fb0dSmrg void *ptr_shader; 22605324fb0dSmrg uint32_t *ptr_cmd; 22615324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 22625324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 22635324fb0dSmrg int i, r; 22645324fb0dSmrg int bo_dst_size = 16384; 22655324fb0dSmrg int bo_shader_size = 4096; 22665324fb0dSmrg int bo_cmd_size = 4096; 22675324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 22685324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 22695324fb0dSmrg amdgpu_bo_list_handle bo_list; 22705324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 22715324fb0dSmrg uint32_t expired; 22725324fb0dSmrg 22735324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 22745324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22755324fb0dSmrg 22765324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 22775324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 22785324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 22795324fb0dSmrg &mc_address_cmd, &va_cmd); 22805324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22815324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 22825324fb0dSmrg 22835324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 22845324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 22855324fb0dSmrg &bo_shader, &ptr_shader, 22865324fb0dSmrg &mc_address_shader, &va_shader); 22875324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 228888f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 22895324fb0dSmrg 22905324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 22915324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22925324fb0dSmrg 22935324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 22945324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 22955324fb0dSmrg &bo_dst, (void **)&ptr_dst, 22965324fb0dSmrg &mc_address_dst, &va_dst); 22975324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22985324fb0dSmrg 22995324fb0dSmrg i = 0; 23005324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 23015324fb0dSmrg 23025324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 23035324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 23045324fb0dSmrg 23055324fb0dSmrg /* Writes shader state to HW */ 23065324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 23075324fb0dSmrg 23085324fb0dSmrg /* Write constant data */ 23095324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 23105324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23115324fb0dSmrg ptr_cmd[i++] = 0x240; 23125324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 23135324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 23145324fb0dSmrg ptr_cmd[i++] = 0x400; 23155324fb0dSmrg ptr_cmd[i++] = 0x74fac; 23165324fb0dSmrg 23175324fb0dSmrg /* Sets a range of pixel shader constants */ 23185324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23195324fb0dSmrg ptr_cmd[i++] = 0x244; 23205324fb0dSmrg ptr_cmd[i++] = 0x22222222; 23215324fb0dSmrg ptr_cmd[i++] = 0x22222222; 23225324fb0dSmrg ptr_cmd[i++] = 0x22222222; 23235324fb0dSmrg ptr_cmd[i++] = 0x22222222; 23245324fb0dSmrg 232588f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 232688f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 232788f8a8d2Smrg ptr_cmd[i++] = 0x215; 232888f8a8d2Smrg ptr_cmd[i++] = 0; 232988f8a8d2Smrg 23305324fb0dSmrg /* dispatch direct command */ 23315324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 23325324fb0dSmrg ptr_cmd[i++] = 0x10; 23335324fb0dSmrg ptr_cmd[i++] = 1; 23345324fb0dSmrg ptr_cmd[i++] = 1; 23355324fb0dSmrg ptr_cmd[i++] = 1; 23365324fb0dSmrg 23375324fb0dSmrg while (i & 7) 23385324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 23395324fb0dSmrg 23405324fb0dSmrg resources[0] = bo_dst; 23415324fb0dSmrg resources[1] = bo_shader; 23425324fb0dSmrg resources[2] = bo_cmd; 23435324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 23445324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23455324fb0dSmrg 23465324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 23475324fb0dSmrg ib_info.size = i; 23485324fb0dSmrg ibs_request.ip_type = ip_type; 23495324fb0dSmrg ibs_request.ring = ring; 23505324fb0dSmrg ibs_request.resources = bo_list; 23515324fb0dSmrg ibs_request.number_of_ibs = 1; 23525324fb0dSmrg ibs_request.ibs = &ib_info; 23535324fb0dSmrg ibs_request.fence_info.handle = NULL; 23545324fb0dSmrg 23555324fb0dSmrg /* submit CS */ 23565324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 23575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23585324fb0dSmrg 23595324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 23605324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23615324fb0dSmrg 23625324fb0dSmrg fence_status.ip_type = ip_type; 23635324fb0dSmrg fence_status.ip_instance = 0; 23645324fb0dSmrg fence_status.ring = ring; 23655324fb0dSmrg fence_status.context = context_handle; 23665324fb0dSmrg fence_status.fence = ibs_request.seq_no; 23675324fb0dSmrg 23685324fb0dSmrg /* wait for IB accomplished */ 23695324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 23705324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 23715324fb0dSmrg 0, &expired); 23725324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23735324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 23745324fb0dSmrg 23755324fb0dSmrg /* verify if memset test result meets with expected */ 23765324fb0dSmrg i = 0; 23775324fb0dSmrg while(i < bo_dst_size) { 23785324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 23795324fb0dSmrg } 23805324fb0dSmrg 23815324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 23825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23835324fb0dSmrg 23845324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 23855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23865324fb0dSmrg 23875324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 23885324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23895324fb0dSmrg 23905324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 23915324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23925324fb0dSmrg} 23935324fb0dSmrg 23945324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 23955324fb0dSmrg uint32_t ip_type, 23969bd392adSmrg uint32_t ring, 23979bd392adSmrg int hang) 23985324fb0dSmrg{ 23995324fb0dSmrg amdgpu_context_handle context_handle; 24005324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 24015324fb0dSmrg volatile unsigned char *ptr_dst; 24025324fb0dSmrg void *ptr_shader; 24035324fb0dSmrg unsigned char *ptr_src; 24045324fb0dSmrg uint32_t *ptr_cmd; 24055324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 24065324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 24075324fb0dSmrg int i, r; 24085324fb0dSmrg int bo_dst_size = 16384; 24095324fb0dSmrg int bo_shader_size = 4096; 24105324fb0dSmrg int bo_cmd_size = 4096; 24115324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 24125324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 24139bd392adSmrg uint32_t expired, hang_state, hangs; 24149bd392adSmrg enum cs_type cs_type; 24155324fb0dSmrg amdgpu_bo_list_handle bo_list; 24165324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 24175324fb0dSmrg 24185324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 24195324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24205324fb0dSmrg 24215324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 24225324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 24235324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 24245324fb0dSmrg &mc_address_cmd, &va_cmd); 24255324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24265324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 24275324fb0dSmrg 24285324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 24295324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24305324fb0dSmrg &bo_shader, &ptr_shader, 24315324fb0dSmrg &mc_address_shader, &va_shader); 24325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 243388f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 24345324fb0dSmrg 24359bd392adSmrg cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 24369bd392adSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 24375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24385324fb0dSmrg 24395324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 24405324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24415324fb0dSmrg &bo_src, (void **)&ptr_src, 24425324fb0dSmrg &mc_address_src, &va_src); 24435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24445324fb0dSmrg 24455324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 24465324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24475324fb0dSmrg &bo_dst, (void **)&ptr_dst, 24485324fb0dSmrg &mc_address_dst, &va_dst); 24495324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24505324fb0dSmrg 24515324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 24525324fb0dSmrg 24535324fb0dSmrg i = 0; 24545324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 24555324fb0dSmrg 24565324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 24575324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 24585324fb0dSmrg 24595324fb0dSmrg /* Writes shader state to HW */ 24605324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 24615324fb0dSmrg 24625324fb0dSmrg /* Write constant data */ 24635324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 24645324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 24655324fb0dSmrg ptr_cmd[i++] = 0x240; 24665324fb0dSmrg ptr_cmd[i++] = mc_address_src; 24675324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 24685324fb0dSmrg ptr_cmd[i++] = 0x400; 24695324fb0dSmrg ptr_cmd[i++] = 0x74fac; 24705324fb0dSmrg 24715324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 24725324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 24735324fb0dSmrg ptr_cmd[i++] = 0x244; 24745324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 24755324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 24765324fb0dSmrg ptr_cmd[i++] = 0x400; 24775324fb0dSmrg ptr_cmd[i++] = 0x74fac; 24785324fb0dSmrg 247988f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 248088f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 248188f8a8d2Smrg ptr_cmd[i++] = 0x215; 248288f8a8d2Smrg ptr_cmd[i++] = 0; 248388f8a8d2Smrg 24845324fb0dSmrg /* dispatch direct command */ 24855324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 24865324fb0dSmrg ptr_cmd[i++] = 0x10; 24875324fb0dSmrg ptr_cmd[i++] = 1; 24885324fb0dSmrg ptr_cmd[i++] = 1; 24895324fb0dSmrg ptr_cmd[i++] = 1; 24905324fb0dSmrg 24915324fb0dSmrg while (i & 7) 24925324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 24935324fb0dSmrg 24945324fb0dSmrg resources[0] = bo_shader; 24955324fb0dSmrg resources[1] = bo_src; 24965324fb0dSmrg resources[2] = bo_dst; 24975324fb0dSmrg resources[3] = bo_cmd; 24985324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 24995324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25005324fb0dSmrg 25015324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 25025324fb0dSmrg ib_info.size = i; 25035324fb0dSmrg ibs_request.ip_type = ip_type; 25045324fb0dSmrg ibs_request.ring = ring; 25055324fb0dSmrg ibs_request.resources = bo_list; 25065324fb0dSmrg ibs_request.number_of_ibs = 1; 25075324fb0dSmrg ibs_request.ibs = &ib_info; 25085324fb0dSmrg ibs_request.fence_info.handle = NULL; 25095324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 25105324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25115324fb0dSmrg 25125324fb0dSmrg fence_status.ip_type = ip_type; 25135324fb0dSmrg fence_status.ip_instance = 0; 25145324fb0dSmrg fence_status.ring = ring; 25155324fb0dSmrg fence_status.context = context_handle; 25165324fb0dSmrg fence_status.fence = ibs_request.seq_no; 25175324fb0dSmrg 25185324fb0dSmrg /* wait for IB accomplished */ 25195324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 25205324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 25215324fb0dSmrg 0, &expired); 25225324fb0dSmrg 25239bd392adSmrg if (!hang) { 25249bd392adSmrg CU_ASSERT_EQUAL(r, 0); 25259bd392adSmrg CU_ASSERT_EQUAL(expired, true); 25269bd392adSmrg 25279bd392adSmrg /* verify if memcpy test result meets with expected */ 25289bd392adSmrg i = 0; 25299bd392adSmrg while(i < bo_dst_size) { 25309bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 25319bd392adSmrg i++; 25329bd392adSmrg } 25339bd392adSmrg } else { 25349bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 25359bd392adSmrg CU_ASSERT_EQUAL(r, 0); 25369bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 25375324fb0dSmrg } 25385324fb0dSmrg 25395324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 25405324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25415324fb0dSmrg 25425324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 25435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25445324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 25455324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25465324fb0dSmrg 25475324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 25485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25495324fb0dSmrg 25505324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 25515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25525324fb0dSmrg 25535324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 25545324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25555324fb0dSmrg} 255688f8a8d2Smrg 255788f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void) 25585324fb0dSmrg{ 25595324fb0dSmrg int r; 25605324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 25615324fb0dSmrg uint32_t ring_id; 25625324fb0dSmrg 25635324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 25645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 256588f8a8d2Smrg if (!info.available_rings) 256688f8a8d2Smrg printf("SKIP ... as there's no compute ring\n"); 25675324fb0dSmrg 25685324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 25695324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 25709bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 25715324fb0dSmrg } 257288f8a8d2Smrg} 257388f8a8d2Smrg 257488f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void) 257588f8a8d2Smrg{ 257688f8a8d2Smrg int r; 257788f8a8d2Smrg struct drm_amdgpu_info_hw_ip info; 257888f8a8d2Smrg uint32_t ring_id; 25795324fb0dSmrg 25805324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 25815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 258288f8a8d2Smrg if (!info.available_rings) 258388f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 25845324fb0dSmrg 25855324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 25865324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 25879bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 25889bd392adSmrg } 25899bd392adSmrg} 25909bd392adSmrg 25919bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 25929bd392adSmrg{ 25939bd392adSmrg int r; 25949bd392adSmrg struct drm_amdgpu_info_hw_ip info; 25959bd392adSmrg uint32_t ring_id; 25969bd392adSmrg 25979bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 25989bd392adSmrg CU_ASSERT_EQUAL(r, 0); 25999bd392adSmrg if (!info.available_rings) 26009bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 26019bd392adSmrg 26029bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 26039bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 26049bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 26059bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 26069bd392adSmrg } 26079bd392adSmrg} 26089bd392adSmrg 26099bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 26109bd392adSmrg uint32_t ip_type, uint32_t ring) 26119bd392adSmrg{ 26129bd392adSmrg amdgpu_context_handle context_handle; 26139bd392adSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 26149bd392adSmrg volatile unsigned char *ptr_dst; 26159bd392adSmrg void *ptr_shader; 26169bd392adSmrg unsigned char *ptr_src; 26179bd392adSmrg uint32_t *ptr_cmd; 26189bd392adSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 26199bd392adSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 26209bd392adSmrg int i, r; 26219bd392adSmrg int bo_dst_size = 0x4000000; 26229bd392adSmrg int bo_shader_size = 0x400000; 26239bd392adSmrg int bo_cmd_size = 4096; 26249bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 26259bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 26269bd392adSmrg uint32_t hang_state, hangs, expired; 26279bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 26289bd392adSmrg amdgpu_bo_list_handle bo_list; 26299bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 26309bd392adSmrg 26319bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 26329bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26339bd392adSmrg 26349bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 26359bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26369bd392adSmrg 26379bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 26389bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 26399bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 26409bd392adSmrg &mc_address_cmd, &va_cmd); 26419bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26429bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 26439bd392adSmrg 26449bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 26459bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26469bd392adSmrg &bo_shader, &ptr_shader, 26479bd392adSmrg &mc_address_shader, &va_shader); 26489bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26499bd392adSmrg memset(ptr_shader, 0, bo_shader_size); 26509bd392adSmrg 26519bd392adSmrg r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 26529bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26539bd392adSmrg 26549bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26559bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26569bd392adSmrg &bo_src, (void **)&ptr_src, 26579bd392adSmrg &mc_address_src, &va_src); 26589bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26599bd392adSmrg 26609bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26619bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26629bd392adSmrg &bo_dst, (void **)&ptr_dst, 26639bd392adSmrg &mc_address_dst, &va_dst); 26649bd392adSmrg CU_ASSERT_EQUAL(r, 0); 26659bd392adSmrg 26669bd392adSmrg memset(ptr_src, 0x55, bo_dst_size); 26679bd392adSmrg 26689bd392adSmrg i = 0; 26699bd392adSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 26709bd392adSmrg 26719bd392adSmrg /* Issue commands to set cu mask used in current dispatch */ 26729bd392adSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 26739bd392adSmrg 26749bd392adSmrg /* Writes shader state to HW */ 26759bd392adSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 26769bd392adSmrg 26779bd392adSmrg /* Write constant data */ 26789bd392adSmrg /* Writes the texture resource constants data to the SGPRs */ 26799bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26809bd392adSmrg ptr_cmd[i++] = 0x240; 26819bd392adSmrg ptr_cmd[i++] = mc_address_src; 26829bd392adSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 26839bd392adSmrg ptr_cmd[i++] = 0x400000; 26849bd392adSmrg ptr_cmd[i++] = 0x74fac; 26859bd392adSmrg 26869bd392adSmrg /* Writes the UAV constant data to the SGPRs. */ 26879bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26889bd392adSmrg ptr_cmd[i++] = 0x244; 26899bd392adSmrg ptr_cmd[i++] = mc_address_dst; 26909bd392adSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 26919bd392adSmrg ptr_cmd[i++] = 0x400000; 26929bd392adSmrg ptr_cmd[i++] = 0x74fac; 26939bd392adSmrg 26949bd392adSmrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 26959bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 26969bd392adSmrg ptr_cmd[i++] = 0x215; 26979bd392adSmrg ptr_cmd[i++] = 0; 26989bd392adSmrg 26999bd392adSmrg /* dispatch direct command */ 27009bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 27019bd392adSmrg ptr_cmd[i++] = 0x10000; 27029bd392adSmrg ptr_cmd[i++] = 1; 27039bd392adSmrg ptr_cmd[i++] = 1; 27049bd392adSmrg ptr_cmd[i++] = 1; 27059bd392adSmrg 27069bd392adSmrg while (i & 7) 27079bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 27089bd392adSmrg 27099bd392adSmrg resources[0] = bo_shader; 27109bd392adSmrg resources[1] = bo_src; 27119bd392adSmrg resources[2] = bo_dst; 27129bd392adSmrg resources[3] = bo_cmd; 27139bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 27149bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27159bd392adSmrg 27169bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 27179bd392adSmrg ib_info.size = i; 27189bd392adSmrg ibs_request.ip_type = ip_type; 27199bd392adSmrg ibs_request.ring = ring; 27209bd392adSmrg ibs_request.resources = bo_list; 27219bd392adSmrg ibs_request.number_of_ibs = 1; 27229bd392adSmrg ibs_request.ibs = &ib_info; 27239bd392adSmrg ibs_request.fence_info.handle = NULL; 27249bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 27259bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27269bd392adSmrg 27279bd392adSmrg fence_status.ip_type = ip_type; 27289bd392adSmrg fence_status.ip_instance = 0; 27299bd392adSmrg fence_status.ring = ring; 27309bd392adSmrg fence_status.context = context_handle; 27319bd392adSmrg fence_status.fence = ibs_request.seq_no; 27329bd392adSmrg 27339bd392adSmrg /* wait for IB accomplished */ 27349bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 27359bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 27369bd392adSmrg 0, &expired); 27379bd392adSmrg 27389bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 27399bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27409bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 27419bd392adSmrg 27429bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 27439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27449bd392adSmrg 27459bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 27469bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27479bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 27489bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27499bd392adSmrg 27509bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 27519bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27529bd392adSmrg 27539bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 27549bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27559bd392adSmrg 27569bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 27579bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27589bd392adSmrg} 27599bd392adSmrg 27609bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 27619bd392adSmrg{ 27629bd392adSmrg int r; 27639bd392adSmrg struct drm_amdgpu_info_hw_ip info; 27649bd392adSmrg uint32_t ring_id; 27659bd392adSmrg 27669bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 27679bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27689bd392adSmrg if (!info.available_rings) 27699bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 27709bd392adSmrg 27719bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27729bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 27739bd392adSmrg amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 27749bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 27759bd392adSmrg } 27769bd392adSmrg} 27779bd392adSmrg 27789bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 27799bd392adSmrg{ 27809bd392adSmrg struct amdgpu_test_shader *shader; 27819bd392adSmrg int i, loop = 0x40000; 27829bd392adSmrg 27839bd392adSmrg switch (family) { 27849bd392adSmrg case AMDGPU_FAMILY_AI: 27859bd392adSmrg case AMDGPU_FAMILY_RV: 27869bd392adSmrg shader = &memcpy_ps_hang_slow_ai; 27879bd392adSmrg break; 27889bd392adSmrg default: 27899bd392adSmrg return -1; 27909bd392adSmrg break; 27915324fb0dSmrg } 27929bd392adSmrg 27939bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 27949bd392adSmrg 27959bd392adSmrg for (i = 0; i < loop; i++) 27969bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 27979bd392adSmrg shader->shader + shader->header_length, 27989bd392adSmrg shader->body_length * sizeof(uint32_t)); 27999bd392adSmrg 28009bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 28019bd392adSmrg shader->shader + shader->header_length + shader->body_length, 28029bd392adSmrg shader->foot_length * sizeof(uint32_t)); 28039bd392adSmrg 28049bd392adSmrg return 0; 28055324fb0dSmrg} 28065324fb0dSmrg 28075324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 28085324fb0dSmrg{ 28095324fb0dSmrg int i; 28105324fb0dSmrg uint32_t shader_offset= 256; 28115324fb0dSmrg uint32_t mem_offset, patch_code_offset; 28125324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 28135324fb0dSmrg const uint32_t *shader; 28145324fb0dSmrg const uint32_t *patchinfo_code; 28155324fb0dSmrg const uint32_t *patchcode_offset; 28165324fb0dSmrg 28175324fb0dSmrg switch (ps_type) { 28185324fb0dSmrg case PS_CONST: 28195324fb0dSmrg shader = ps_const_shader_gfx9; 28205324fb0dSmrg shader_size = sizeof(ps_const_shader_gfx9); 28215324fb0dSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 28225324fb0dSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 28235324fb0dSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 28245324fb0dSmrg break; 28255324fb0dSmrg case PS_TEX: 28265324fb0dSmrg shader = ps_tex_shader_gfx9; 28275324fb0dSmrg shader_size = sizeof(ps_tex_shader_gfx9); 28285324fb0dSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 28295324fb0dSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 28305324fb0dSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 28315324fb0dSmrg break; 28329bd392adSmrg case PS_HANG: 28339bd392adSmrg shader = memcpy_ps_hang; 28349bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 28359bd392adSmrg 28369bd392adSmrg memcpy(ptr, shader, shader_size); 28379bd392adSmrg return 0; 28385324fb0dSmrg default: 28395324fb0dSmrg return -1; 28405324fb0dSmrg break; 28415324fb0dSmrg } 28425324fb0dSmrg 28435324fb0dSmrg /* write main shader program */ 28445324fb0dSmrg for (i = 0 ; i < 10; i++) { 28455324fb0dSmrg mem_offset = i * shader_offset; 28465324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 28475324fb0dSmrg } 28485324fb0dSmrg 28495324fb0dSmrg /* overwrite patch codes */ 28505324fb0dSmrg for (i = 0 ; i < 10; i++) { 28515324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 28525324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 28535324fb0dSmrg memcpy(ptr + mem_offset, 28545324fb0dSmrg patchinfo_code + patch_code_offset, 28555324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 28565324fb0dSmrg } 28575324fb0dSmrg 28585324fb0dSmrg return 0; 28595324fb0dSmrg} 28605324fb0dSmrg 28615324fb0dSmrg/* load RectPosTexFast_VS */ 28625324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr) 28635324fb0dSmrg{ 28645324fb0dSmrg const uint32_t *shader; 28655324fb0dSmrg uint32_t shader_size; 28665324fb0dSmrg 28675324fb0dSmrg shader = vs_RectPosTexFast_shader_gfx9; 28685324fb0dSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 28695324fb0dSmrg 28705324fb0dSmrg memcpy(ptr, shader, shader_size); 28715324fb0dSmrg 28725324fb0dSmrg return 0; 28735324fb0dSmrg} 28745324fb0dSmrg 28755324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr) 28765324fb0dSmrg{ 28775324fb0dSmrg int i = 0; 28785324fb0dSmrg const uint32_t *preamblecache_ptr; 28795324fb0dSmrg uint32_t preamblecache_size; 28805324fb0dSmrg 28815324fb0dSmrg /* Write context control and load shadowing register if necessary */ 28825324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 28835324fb0dSmrg ptr[i++] = 0x80000000; 28845324fb0dSmrg ptr[i++] = 0x80000000; 28855324fb0dSmrg 28865324fb0dSmrg preamblecache_ptr = preamblecache_gfx9; 28875324fb0dSmrg preamblecache_size = sizeof(preamblecache_gfx9); 28885324fb0dSmrg 28895324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 28905324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 28915324fb0dSmrg} 28925324fb0dSmrg 28935324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 28949bd392adSmrg uint64_t dst_addr, 28959bd392adSmrg int hang_slow) 28965324fb0dSmrg{ 28975324fb0dSmrg int i = 0; 28985324fb0dSmrg 28995324fb0dSmrg /* setup color buffer */ 29005324fb0dSmrg /* offset reg 29015324fb0dSmrg 0xA318 CB_COLOR0_BASE 29025324fb0dSmrg 0xA319 CB_COLOR0_BASE_EXT 29035324fb0dSmrg 0xA31A CB_COLOR0_ATTRIB2 29045324fb0dSmrg 0xA31B CB_COLOR0_VIEW 29055324fb0dSmrg 0xA31C CB_COLOR0_INFO 29065324fb0dSmrg 0xA31D CB_COLOR0_ATTRIB 29075324fb0dSmrg 0xA31E CB_COLOR0_DCC_CONTROL 29085324fb0dSmrg 0xA31F CB_COLOR0_CMASK 29095324fb0dSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 29105324fb0dSmrg 0xA321 CB_COLOR0_FMASK 29115324fb0dSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 29125324fb0dSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 29135324fb0dSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 29145324fb0dSmrg 0xA325 CB_COLOR0_DCC_BASE 29155324fb0dSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 29165324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 29175324fb0dSmrg ptr[i++] = 0x318; 29185324fb0dSmrg ptr[i++] = dst_addr >> 8; 29195324fb0dSmrg ptr[i++] = dst_addr >> 40; 29209bd392adSmrg ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 29215324fb0dSmrg ptr[i++] = 0; 29225324fb0dSmrg ptr[i++] = 0x50438; 29235324fb0dSmrg ptr[i++] = 0x10140000; 29245324fb0dSmrg i += 9; 29255324fb0dSmrg 29265324fb0dSmrg /* mmCB_MRT0_EPITCH */ 29275324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29285324fb0dSmrg ptr[i++] = 0x1e8; 29299bd392adSmrg ptr[i++] = hang_slow ? 0x7ff : 0x1f; 29305324fb0dSmrg 29315324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 29325324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29335324fb0dSmrg ptr[i++] = 0x32b; 29345324fb0dSmrg ptr[i++] = 0; 29355324fb0dSmrg 29365324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 29375324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29385324fb0dSmrg ptr[i++] = 0x33a; 29395324fb0dSmrg ptr[i++] = 0; 29405324fb0dSmrg 29415324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 29425324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29435324fb0dSmrg ptr[i++] = 0x1c5; 29445324fb0dSmrg ptr[i++] = 9; 29455324fb0dSmrg 29465324fb0dSmrg /* Setup depth buffer */ 29475324fb0dSmrg /* mmDB_Z_INFO */ 29485324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 29495324fb0dSmrg ptr[i++] = 0xe; 29505324fb0dSmrg i += 2; 29515324fb0dSmrg 29525324fb0dSmrg return i; 29535324fb0dSmrg} 29545324fb0dSmrg 29559bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 29565324fb0dSmrg{ 29575324fb0dSmrg int i = 0; 29585324fb0dSmrg const uint32_t *cached_cmd_ptr; 29595324fb0dSmrg uint32_t cached_cmd_size; 29605324fb0dSmrg 29615324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 29625324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29635324fb0dSmrg ptr[i++] = 0xd7; 29645324fb0dSmrg ptr[i++] = 0; 29655324fb0dSmrg 29665324fb0dSmrg ptr[i++] = 0xffff1000; 29675324fb0dSmrg ptr[i++] = 0xc0021000; 29685324fb0dSmrg 29695324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 29705324fb0dSmrg ptr[i++] = 0xd7; 29715324fb0dSmrg ptr[i++] = 1; 29725324fb0dSmrg 29735324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 29745324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 29755324fb0dSmrg ptr[i++] = 0x2fe; 29765324fb0dSmrg i += 16; 29775324fb0dSmrg 29785324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 29795324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 29805324fb0dSmrg ptr[i++] = 0x2f5; 29815324fb0dSmrg i += 2; 29825324fb0dSmrg 29835324fb0dSmrg cached_cmd_ptr = cached_cmd_gfx9; 29845324fb0dSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 29855324fb0dSmrg 29865324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 29879bd392adSmrg if (hang_slow) 29889bd392adSmrg *(ptr + i + 12) = 0x8000800; 29895324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 29905324fb0dSmrg 29915324fb0dSmrg return i; 29925324fb0dSmrg} 29935324fb0dSmrg 29945324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 29955324fb0dSmrg int ps_type, 29969bd392adSmrg uint64_t shader_addr, 29979bd392adSmrg int hang_slow) 29985324fb0dSmrg{ 29995324fb0dSmrg int i = 0; 30005324fb0dSmrg 30015324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 30025324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 30035324fb0dSmrg ptr[i++] = 0x207; 30045324fb0dSmrg ptr[i++] = 0; 30055324fb0dSmrg 30065324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 30075324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 30085324fb0dSmrg ptr[i++] = 0x46; 30095324fb0dSmrg ptr[i++] = 0xffff; 30105324fb0dSmrg 30115324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 30125324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 30135324fb0dSmrg ptr[i++] = 0x48; 30145324fb0dSmrg ptr[i++] = shader_addr >> 8; 30155324fb0dSmrg ptr[i++] = shader_addr >> 40; 30165324fb0dSmrg 30175324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 30185324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 30195324fb0dSmrg ptr[i++] = 0x4a; 30205324fb0dSmrg ptr[i++] = 0xc0081; 30215324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 30225324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 30235324fb0dSmrg ptr[i++] = 0x4b; 30245324fb0dSmrg ptr[i++] = 0x18; 30255324fb0dSmrg 30265324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 30275324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 30285324fb0dSmrg ptr[i++] = 0x1b1; 30295324fb0dSmrg ptr[i++] = 2; 30305324fb0dSmrg 30315324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 30325324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 30335324fb0dSmrg ptr[i++] = 0x1c3; 30345324fb0dSmrg ptr[i++] = 4; 30355324fb0dSmrg 30365324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 30375324fb0dSmrg ptr[i++] = 0x4c; 30385324fb0dSmrg i += 2; 30399bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 30409bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 30415324fb0dSmrg 30425324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 30435324fb0dSmrg ptr[i++] = 0x50; 30445324fb0dSmrg i += 2; 30455324fb0dSmrg if (ps_type == PS_CONST) { 30465324fb0dSmrg i += 2; 30475324fb0dSmrg } else if (ps_type == PS_TEX) { 30485324fb0dSmrg ptr[i++] = 0x3f800000; 30495324fb0dSmrg ptr[i++] = 0x3f800000; 30505324fb0dSmrg } 30515324fb0dSmrg 30525324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 30535324fb0dSmrg ptr[i++] = 0x54; 30545324fb0dSmrg i += 4; 30555324fb0dSmrg 30565324fb0dSmrg return i; 30575324fb0dSmrg} 30585324fb0dSmrg 30595324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 30605324fb0dSmrg int ps_type, 30615324fb0dSmrg uint64_t shader_addr) 30625324fb0dSmrg{ 30635324fb0dSmrg int i, j; 30645324fb0dSmrg const uint32_t *sh_registers; 30655324fb0dSmrg const uint32_t *context_registers; 30665324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 30675324fb0dSmrg 30685324fb0dSmrg if (ps_type == PS_CONST) { 30695324fb0dSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 30705324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 30715324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 30725324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 30735324fb0dSmrg } else if (ps_type == PS_TEX) { 30745324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 30755324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 30765324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 30775324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 30785324fb0dSmrg } 30795324fb0dSmrg 30805324fb0dSmrg i = 0; 30815324fb0dSmrg 30825324fb0dSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 30835324fb0dSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 30845324fb0dSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 30855324fb0dSmrg shader_addr += 256 * 9; 30865324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 30875324fb0dSmrg ptr[i++] = 0x7; 30885324fb0dSmrg ptr[i++] = 0xffff; 30895324fb0dSmrg ptr[i++] = shader_addr >> 8; 30905324fb0dSmrg ptr[i++] = shader_addr >> 40; 30915324fb0dSmrg 30925324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 30935324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 30945324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 30955324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 30965324fb0dSmrg } 30975324fb0dSmrg 30985324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 30995324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 31005324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31015324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 31025324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 31035324fb0dSmrg } 31045324fb0dSmrg 31055324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 31065324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31075324fb0dSmrg ptr[i++] = 0x1b3; 31085324fb0dSmrg ptr[i++] = 2; 31095324fb0dSmrg } 31105324fb0dSmrg } 31115324fb0dSmrg 31125324fb0dSmrg return i; 31135324fb0dSmrg} 31145324fb0dSmrg 31155324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr) 31165324fb0dSmrg{ 31175324fb0dSmrg int i = 0; 31185324fb0dSmrg 31195324fb0dSmrg /* mmIA_MULTI_VGT_PARAM */ 31205324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 31215324fb0dSmrg ptr[i++] = 0x40000258; 31225324fb0dSmrg ptr[i++] = 0xd00ff; 31235324fb0dSmrg 31245324fb0dSmrg /* mmVGT_PRIMITIVE_TYPE */ 31255324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 31265324fb0dSmrg ptr[i++] = 0x10000242; 31275324fb0dSmrg ptr[i++] = 0x11; 31285324fb0dSmrg 31295324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 31305324fb0dSmrg ptr[i++] = 3; 31315324fb0dSmrg ptr[i++] = 2; 31325324fb0dSmrg 31335324fb0dSmrg return i; 31345324fb0dSmrg} 31355324fb0dSmrg 31365324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 31375324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 31385324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 31395324fb0dSmrg uint64_t mc_address_shader_ps, 31405324fb0dSmrg uint64_t mc_address_shader_vs, 31415324fb0dSmrg uint32_t ring_id) 31425324fb0dSmrg{ 31435324fb0dSmrg amdgpu_context_handle context_handle; 31445324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 31455324fb0dSmrg volatile unsigned char *ptr_dst; 31465324fb0dSmrg uint32_t *ptr_cmd; 31475324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 31485324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 31495324fb0dSmrg int i, r; 31505324fb0dSmrg int bo_dst_size = 16384; 31515324fb0dSmrg int bo_cmd_size = 4096; 31525324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 31535324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 31545324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 31555324fb0dSmrg uint32_t expired; 31565324fb0dSmrg amdgpu_bo_list_handle bo_list; 31575324fb0dSmrg 31585324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 31595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31605324fb0dSmrg 31615324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 31625324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 31635324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 31645324fb0dSmrg &mc_address_cmd, &va_cmd); 31655324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31665324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 31675324fb0dSmrg 31685324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 31695324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 31705324fb0dSmrg &bo_dst, (void **)&ptr_dst, 31715324fb0dSmrg &mc_address_dst, &va_dst); 31725324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31735324fb0dSmrg 31745324fb0dSmrg i = 0; 31755324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 31765324fb0dSmrg 31779bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 31785324fb0dSmrg 31799bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 31805324fb0dSmrg 31819bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 31825324fb0dSmrg 31835324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 31845324fb0dSmrg 31855324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 31865324fb0dSmrg ptr_cmd[i++] = 0xc; 31875324fb0dSmrg ptr_cmd[i++] = 0x33333333; 31885324fb0dSmrg ptr_cmd[i++] = 0x33333333; 31895324fb0dSmrg ptr_cmd[i++] = 0x33333333; 31905324fb0dSmrg ptr_cmd[i++] = 0x33333333; 31915324fb0dSmrg 31925324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 31935324fb0dSmrg 31945324fb0dSmrg while (i & 7) 31955324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 31965324fb0dSmrg 31975324fb0dSmrg resources[0] = bo_dst; 31985324fb0dSmrg resources[1] = bo_shader_ps; 31995324fb0dSmrg resources[2] = bo_shader_vs; 32005324fb0dSmrg resources[3] = bo_cmd; 32019bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 32025324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32035324fb0dSmrg 32045324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 32055324fb0dSmrg ib_info.size = i; 32065324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 32075324fb0dSmrg ibs_request.ring = ring_id; 32085324fb0dSmrg ibs_request.resources = bo_list; 32095324fb0dSmrg ibs_request.number_of_ibs = 1; 32105324fb0dSmrg ibs_request.ibs = &ib_info; 32115324fb0dSmrg ibs_request.fence_info.handle = NULL; 32125324fb0dSmrg 32135324fb0dSmrg /* submit CS */ 32145324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 32155324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32165324fb0dSmrg 32175324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 32185324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32195324fb0dSmrg 32205324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 32215324fb0dSmrg fence_status.ip_instance = 0; 32225324fb0dSmrg fence_status.ring = ring_id; 32235324fb0dSmrg fence_status.context = context_handle; 32245324fb0dSmrg fence_status.fence = ibs_request.seq_no; 32255324fb0dSmrg 32265324fb0dSmrg /* wait for IB accomplished */ 32275324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 32285324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 32295324fb0dSmrg 0, &expired); 32305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32315324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 32325324fb0dSmrg 32335324fb0dSmrg /* verify if memset test result meets with expected */ 32345324fb0dSmrg i = 0; 32355324fb0dSmrg while(i < bo_dst_size) { 32365324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 32375324fb0dSmrg } 32385324fb0dSmrg 32395324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 32405324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32415324fb0dSmrg 32425324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 32435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32445324fb0dSmrg 32455324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 32465324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32475324fb0dSmrg} 32485324fb0dSmrg 32495324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 32505324fb0dSmrg uint32_t ring) 32515324fb0dSmrg{ 32525324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 32535324fb0dSmrg void *ptr_shader_ps; 32545324fb0dSmrg void *ptr_shader_vs; 32555324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 32565324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 32575324fb0dSmrg int r; 32585324fb0dSmrg int bo_shader_size = 4096; 32595324fb0dSmrg 32605324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 32615324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 32625324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 32635324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 32645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 326588f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 32665324fb0dSmrg 32675324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 32685324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 32695324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 32705324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 32715324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 327288f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 32735324fb0dSmrg 32745324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 32755324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32765324fb0dSmrg 32775324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 32785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32795324fb0dSmrg 32805324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 32815324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 32825324fb0dSmrg 32835324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 32845324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32855324fb0dSmrg 32865324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 32875324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 32885324fb0dSmrg} 32895324fb0dSmrg 32905324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 32915324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 32925324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 32935324fb0dSmrg uint64_t mc_address_shader_ps, 32945324fb0dSmrg uint64_t mc_address_shader_vs, 32959bd392adSmrg uint32_t ring, int hang) 32965324fb0dSmrg{ 32975324fb0dSmrg amdgpu_context_handle context_handle; 32985324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 32995324fb0dSmrg volatile unsigned char *ptr_dst; 33005324fb0dSmrg unsigned char *ptr_src; 33015324fb0dSmrg uint32_t *ptr_cmd; 33025324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 33035324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 33045324fb0dSmrg int i, r; 33055324fb0dSmrg int bo_size = 16384; 33065324fb0dSmrg int bo_cmd_size = 4096; 33075324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 33085324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 33099bd392adSmrg uint32_t hang_state, hangs; 33109bd392adSmrg uint32_t expired; 33115324fb0dSmrg amdgpu_bo_list_handle bo_list; 33125324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 33135324fb0dSmrg 33145324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 33155324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33165324fb0dSmrg 33175324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 33185324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 33195324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 33205324fb0dSmrg &mc_address_cmd, &va_cmd); 33215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33225324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 33235324fb0dSmrg 33245324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 33255324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 33265324fb0dSmrg &bo_src, (void **)&ptr_src, 33275324fb0dSmrg &mc_address_src, &va_src); 33285324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33295324fb0dSmrg 33305324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 33315324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 33325324fb0dSmrg &bo_dst, (void **)&ptr_dst, 33335324fb0dSmrg &mc_address_dst, &va_dst); 33345324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33355324fb0dSmrg 33365324fb0dSmrg memset(ptr_src, 0x55, bo_size); 33375324fb0dSmrg 33385324fb0dSmrg i = 0; 33395324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 33405324fb0dSmrg 33419bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 33425324fb0dSmrg 33439bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 33445324fb0dSmrg 33459bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 33465324fb0dSmrg 33475324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 33485324fb0dSmrg 33495324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 33505324fb0dSmrg ptr_cmd[i++] = 0xc; 33515324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 8; 33525324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 33535324fb0dSmrg ptr_cmd[i++] = 0x7c01f; 33545324fb0dSmrg ptr_cmd[i++] = 0x90500fac; 33555324fb0dSmrg ptr_cmd[i++] = 0x3e000; 33565324fb0dSmrg i += 3; 33575324fb0dSmrg 33585324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 33595324fb0dSmrg ptr_cmd[i++] = 0x14; 33605324fb0dSmrg ptr_cmd[i++] = 0x92; 33615324fb0dSmrg i += 3; 33625324fb0dSmrg 336388f8a8d2Smrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 33645324fb0dSmrg ptr_cmd[i++] = 0x191; 33655324fb0dSmrg ptr_cmd[i++] = 0; 33665324fb0dSmrg 33675324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 33685324fb0dSmrg 33695324fb0dSmrg while (i & 7) 33705324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 33715324fb0dSmrg 33725324fb0dSmrg resources[0] = bo_dst; 33735324fb0dSmrg resources[1] = bo_src; 33745324fb0dSmrg resources[2] = bo_shader_ps; 33755324fb0dSmrg resources[3] = bo_shader_vs; 33765324fb0dSmrg resources[4] = bo_cmd; 33775324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 33785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33795324fb0dSmrg 33805324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 33815324fb0dSmrg ib_info.size = i; 33825324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 33835324fb0dSmrg ibs_request.ring = ring; 33845324fb0dSmrg ibs_request.resources = bo_list; 33855324fb0dSmrg ibs_request.number_of_ibs = 1; 33865324fb0dSmrg ibs_request.ibs = &ib_info; 33875324fb0dSmrg ibs_request.fence_info.handle = NULL; 33885324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 33895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33905324fb0dSmrg 33915324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 33925324fb0dSmrg fence_status.ip_instance = 0; 33935324fb0dSmrg fence_status.ring = ring; 33945324fb0dSmrg fence_status.context = context_handle; 33955324fb0dSmrg fence_status.fence = ibs_request.seq_no; 33965324fb0dSmrg 33975324fb0dSmrg /* wait for IB accomplished */ 33985324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 33995324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 34005324fb0dSmrg 0, &expired); 34019bd392adSmrg if (!hang) { 34029bd392adSmrg CU_ASSERT_EQUAL(r, 0); 34039bd392adSmrg CU_ASSERT_EQUAL(expired, true); 34045324fb0dSmrg 34059bd392adSmrg /* verify if memcpy test result meets with expected */ 34069bd392adSmrg i = 0; 34079bd392adSmrg while(i < bo_size) { 34089bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 34099bd392adSmrg i++; 34109bd392adSmrg } 34119bd392adSmrg } else { 34129bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 34139bd392adSmrg CU_ASSERT_EQUAL(r, 0); 34149bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 34155324fb0dSmrg } 34165324fb0dSmrg 34175324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 34185324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34195324fb0dSmrg 34205324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 34215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34225324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 34235324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34245324fb0dSmrg 34255324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 34265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34275324fb0dSmrg 34285324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 34295324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34305324fb0dSmrg} 34315324fb0dSmrg 34329bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 34339bd392adSmrg int hang) 34345324fb0dSmrg{ 34355324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 34365324fb0dSmrg void *ptr_shader_ps; 34375324fb0dSmrg void *ptr_shader_vs; 34385324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 34395324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 34405324fb0dSmrg int bo_shader_size = 4096; 34419bd392adSmrg enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 34425324fb0dSmrg int r; 34435324fb0dSmrg 34445324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34455324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34465324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 34475324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 34485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 344988f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 34505324fb0dSmrg 34515324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34525324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34535324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 34545324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 34555324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 345688f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 34575324fb0dSmrg 34589bd392adSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 34595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34605324fb0dSmrg 34615324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 34625324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34635324fb0dSmrg 34645324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 34659bd392adSmrg mc_address_shader_ps, mc_address_shader_vs, ring, hang); 34665324fb0dSmrg 34675324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 34685324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34695324fb0dSmrg 34705324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 34715324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34725324fb0dSmrg} 34735324fb0dSmrg 34745324fb0dSmrgstatic void amdgpu_draw_test(void) 34755324fb0dSmrg{ 34765324fb0dSmrg int r; 34775324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 34785324fb0dSmrg uint32_t ring_id; 34795324fb0dSmrg 34805324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 34815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 348288f8a8d2Smrg if (!info.available_rings) 348388f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 34845324fb0dSmrg 34855324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 34865324fb0dSmrg amdgpu_memset_draw_test(device_handle, ring_id); 34879bd392adSmrg amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 34885324fb0dSmrg } 34895324fb0dSmrg} 349088f8a8d2Smrg 34919bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 34929bd392adSmrg{ 34939bd392adSmrg amdgpu_context_handle context_handle; 34949bd392adSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 34959bd392adSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 34969bd392adSmrg void *ptr_shader_ps; 34979bd392adSmrg void *ptr_shader_vs; 34989bd392adSmrg volatile unsigned char *ptr_dst; 34999bd392adSmrg unsigned char *ptr_src; 35009bd392adSmrg uint32_t *ptr_cmd; 35019bd392adSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 35029bd392adSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 35039bd392adSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 35049bd392adSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 35059bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 35069bd392adSmrg int i, r; 35079bd392adSmrg int bo_size = 0x4000000; 35089bd392adSmrg int bo_shader_ps_size = 0x400000; 35099bd392adSmrg int bo_shader_vs_size = 4096; 35109bd392adSmrg int bo_cmd_size = 4096; 35119bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 35129bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 35139bd392adSmrg uint32_t hang_state, hangs, expired; 35149bd392adSmrg amdgpu_bo_list_handle bo_list; 35159bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 35169bd392adSmrg 35179bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 35189bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35199bd392adSmrg 35209bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 35219bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35229bd392adSmrg 35239bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 35249bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 35259bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 35269bd392adSmrg &mc_address_cmd, &va_cmd); 35279bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35289bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 35299bd392adSmrg 35309bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 35319bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35329bd392adSmrg &bo_shader_ps, &ptr_shader_ps, 35339bd392adSmrg &mc_address_shader_ps, &va_shader_ps); 35349bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35359bd392adSmrg memset(ptr_shader_ps, 0, bo_shader_ps_size); 35369bd392adSmrg 35379bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 35389bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35399bd392adSmrg &bo_shader_vs, &ptr_shader_vs, 35409bd392adSmrg &mc_address_shader_vs, &va_shader_vs); 35419bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35429bd392adSmrg memset(ptr_shader_vs, 0, bo_shader_vs_size); 35439bd392adSmrg 35449bd392adSmrg r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 35459bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35469bd392adSmrg 35479bd392adSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 35489bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35499bd392adSmrg 35509bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35519bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35529bd392adSmrg &bo_src, (void **)&ptr_src, 35539bd392adSmrg &mc_address_src, &va_src); 35549bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35559bd392adSmrg 35569bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35579bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35589bd392adSmrg &bo_dst, (void **)&ptr_dst, 35599bd392adSmrg &mc_address_dst, &va_dst); 35609bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35619bd392adSmrg 35629bd392adSmrg memset(ptr_src, 0x55, bo_size); 35639bd392adSmrg 35649bd392adSmrg i = 0; 35659bd392adSmrg i += amdgpu_draw_init(ptr_cmd + i); 35669bd392adSmrg 35679bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 35689bd392adSmrg 35699bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 35709bd392adSmrg 35719bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 35729bd392adSmrg mc_address_shader_vs, 1); 35739bd392adSmrg 35749bd392adSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 35759bd392adSmrg 35769bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 35779bd392adSmrg ptr_cmd[i++] = 0xc; 35789bd392adSmrg ptr_cmd[i++] = mc_address_src >> 8; 35799bd392adSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 35809bd392adSmrg ptr_cmd[i++] = 0x1ffc7ff; 35819bd392adSmrg ptr_cmd[i++] = 0x90500fac; 35829bd392adSmrg ptr_cmd[i++] = 0xffe000; 35839bd392adSmrg i += 3; 35849bd392adSmrg 35859bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 35869bd392adSmrg ptr_cmd[i++] = 0x14; 35879bd392adSmrg ptr_cmd[i++] = 0x92; 35889bd392adSmrg i += 3; 35899bd392adSmrg 35909bd392adSmrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35919bd392adSmrg ptr_cmd[i++] = 0x191; 35929bd392adSmrg ptr_cmd[i++] = 0; 35939bd392adSmrg 35949bd392adSmrg i += amdgpu_draw_draw(ptr_cmd + i); 35959bd392adSmrg 35969bd392adSmrg while (i & 7) 35979bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 35989bd392adSmrg 35999bd392adSmrg resources[0] = bo_dst; 36009bd392adSmrg resources[1] = bo_src; 36019bd392adSmrg resources[2] = bo_shader_ps; 36029bd392adSmrg resources[3] = bo_shader_vs; 36039bd392adSmrg resources[4] = bo_cmd; 36049bd392adSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 36059bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36069bd392adSmrg 36079bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 36089bd392adSmrg ib_info.size = i; 36099bd392adSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 36109bd392adSmrg ibs_request.ring = ring; 36119bd392adSmrg ibs_request.resources = bo_list; 36129bd392adSmrg ibs_request.number_of_ibs = 1; 36139bd392adSmrg ibs_request.ibs = &ib_info; 36149bd392adSmrg ibs_request.fence_info.handle = NULL; 36159bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 36169bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36179bd392adSmrg 36189bd392adSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 36199bd392adSmrg fence_status.ip_instance = 0; 36209bd392adSmrg fence_status.ring = ring; 36219bd392adSmrg fence_status.context = context_handle; 36229bd392adSmrg fence_status.fence = ibs_request.seq_no; 36239bd392adSmrg 36249bd392adSmrg /* wait for IB accomplished */ 36259bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 36269bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 36279bd392adSmrg 0, &expired); 36289bd392adSmrg 36299bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 36309bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36319bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 36329bd392adSmrg 36339bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 36349bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36359bd392adSmrg 36369bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 36379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36389bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 36399bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36409bd392adSmrg 36419bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 36429bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36439bd392adSmrg 36449bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 36459bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36469bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 36479bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36489bd392adSmrg 36499bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 36509bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36519bd392adSmrg} 36529bd392adSmrg 365388f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void) 365488f8a8d2Smrg{ 365588f8a8d2Smrg int r; 365688f8a8d2Smrg char debugfs_path[256], tmp[10]; 365788f8a8d2Smrg int fd; 365888f8a8d2Smrg struct stat sbuf; 365988f8a8d2Smrg amdgpu_context_handle context_handle; 366088f8a8d2Smrg uint32_t hang_state, hangs; 366188f8a8d2Smrg 366288f8a8d2Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 366388f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 366488f8a8d2Smrg 366588f8a8d2Smrg r = fstat(drm_amdgpu[0], &sbuf); 366688f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 366788f8a8d2Smrg 366888f8a8d2Smrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 366988f8a8d2Smrg fd = open(debugfs_path, O_RDONLY); 367088f8a8d2Smrg CU_ASSERT(fd >= 0); 367188f8a8d2Smrg 367288f8a8d2Smrg r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 367388f8a8d2Smrg CU_ASSERT(r > 0); 367488f8a8d2Smrg 367588f8a8d2Smrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 367688f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 367788f8a8d2Smrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 367888f8a8d2Smrg 367988f8a8d2Smrg close(fd); 368088f8a8d2Smrg r = amdgpu_cs_ctx_free(context_handle); 368188f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 368288f8a8d2Smrg 368388f8a8d2Smrg amdgpu_compute_dispatch_test(); 368488f8a8d2Smrg amdgpu_gfx_dispatch_test(); 368588f8a8d2Smrg} 3686