basic_tests.c revision 4babd585
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 2788f8a8d2Smrg#include <sys/types.h> 2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS 2988f8a8d2Smrg#include <sys/sysmacros.h> 3088f8a8d2Smrg#endif 3188f8a8d2Smrg#include <sys/stat.h> 3288f8a8d2Smrg#include <fcntl.h> 339bd392adSmrg#if HAVE_ALLOCA_H 343f012e29Smrg# include <alloca.h> 353f012e29Smrg#endif 3600a23bdaSmrg#include <sys/wait.h> 373f012e29Smrg 383f012e29Smrg#include "CUnit/Basic.h" 393f012e29Smrg 403f012e29Smrg#include "amdgpu_test.h" 413f012e29Smrg#include "amdgpu_drm.h" 4241687f09Smrg#include "amdgpu_internal.h" 437cdc0497Smrg#include "util_math.h" 443f012e29Smrg 453f012e29Smrgstatic amdgpu_device_handle device_handle; 463f012e29Smrgstatic uint32_t major_version; 473f012e29Smrgstatic uint32_t minor_version; 48d8807b2fSmrgstatic uint32_t family_id; 494babd585Smrgstatic uint32_t chip_id; 504babd585Smrgstatic uint32_t chip_rev; 513f012e29Smrg 523f012e29Smrgstatic void amdgpu_query_info_test(void); 533f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 543f012e29Smrgstatic void amdgpu_command_submission_compute(void); 55d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 563f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 573f012e29Smrgstatic void amdgpu_userptr_test(void); 583f012e29Smrgstatic void amdgpu_semaphore_test(void); 5900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 6000a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 6188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void); 6288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void); 635324fb0dSmrgstatic void amdgpu_draw_test(void); 6488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void); 653f012e29Smrg 663f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 673f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 683f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 6900a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 7000a23bdaSmrg unsigned ip_type, 7100a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 7200a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 7300a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 7400a23bdaSmrg struct amdgpu_cs_request *ibs_request); 7541687f09Smrg 763f012e29SmrgCU_TestInfo basic_tests[] = { 773f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 783f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 7900a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 803f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 813f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 82d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 833f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 843f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 8500a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 8688f8a8d2Smrg { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 8788f8a8d2Smrg { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 885324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 8988f8a8d2Smrg { "GPU reset Test", amdgpu_gpu_reset_test }, 903f012e29Smrg CU_TEST_INFO_NULL, 913f012e29Smrg}; 929bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 933f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 943f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 953f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 963f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 973f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 983f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 993f012e29Smrg /* 0 = byte fill 1003f012e29Smrg * 2 = DW fill 1013f012e29Smrg */ 1023f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1033f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 1043f012e29Smrg (((op) & 0xFF) << 0)) 1053f012e29Smrg#define SDMA_OPCODE_WRITE 2 1063f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1073f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 1083f012e29Smrg 1093f012e29Smrg#define SDMA_OPCODE_COPY 1 1103f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 1113f012e29Smrg 11241687f09Smrg#define SDMA_OPCODE_ATOMIC 10 11341687f09Smrg# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 11441687f09Smrg /* 0 - single_pass_atomic. 11541687f09Smrg * 1 - loop_until_compare_satisfied. 11641687f09Smrg */ 11741687f09Smrg# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 11841687f09Smrg /* 0 - non-TMZ. 11941687f09Smrg * 1 - TMZ. 12041687f09Smrg */ 12141687f09Smrg# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 12241687f09Smrg /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 12341687f09Smrg * same as Packet 3 12441687f09Smrg */ 12541687f09Smrg 1263f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1273f012e29Smrg#define SDMA_NOP 0x0 1283f012e29Smrg 1293f012e29Smrg/* PM4 */ 1303f012e29Smrg#define PACKET_TYPE0 0 1313f012e29Smrg#define PACKET_TYPE1 1 1323f012e29Smrg#define PACKET_TYPE2 2 1333f012e29Smrg#define PACKET_TYPE3 3 1343f012e29Smrg 1353f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1363f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1373f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1383f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1393f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1403f012e29Smrg ((reg) & 0xFFFF) | \ 1413f012e29Smrg ((n) & 0x3FFF) << 16) 1423f012e29Smrg#define CP_PACKET2 0x80000000 1433f012e29Smrg#define PACKET2_PAD_SHIFT 0 1443f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1453f012e29Smrg 1463f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1473f012e29Smrg 1483f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1493f012e29Smrg (((op) & 0xFF) << 8) | \ 1503f012e29Smrg ((n) & 0x3FFF) << 16) 1515324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1523f012e29Smrg 1533f012e29Smrg/* Packet 3 types */ 1543f012e29Smrg#define PACKET3_NOP 0x10 1553f012e29Smrg 1563f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1573f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1583f012e29Smrg /* 0 - register 1593f012e29Smrg * 1 - memory (sync - via GRBM) 1603f012e29Smrg * 2 - gl2 1613f012e29Smrg * 3 - gds 1623f012e29Smrg * 4 - reserved 1633f012e29Smrg * 5 - memory (async - direct) 1643f012e29Smrg */ 1653f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1663f012e29Smrg#define WR_CONFIRM (1 << 20) 1673f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1683f012e29Smrg /* 0 - LRU 1693f012e29Smrg * 1 - Stream 1703f012e29Smrg */ 1713f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1723f012e29Smrg /* 0 - me 1733f012e29Smrg * 1 - pfp 1743f012e29Smrg * 2 - ce 1753f012e29Smrg */ 1763f012e29Smrg 17741687f09Smrg#define PACKET3_ATOMIC_MEM 0x1E 17841687f09Smrg#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 17941687f09Smrg#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 18041687f09Smrg /* 0 - single_pass_atomic. 18141687f09Smrg * 1 - loop_until_compare_satisfied. 18241687f09Smrg */ 18341687f09Smrg#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 18441687f09Smrg /* 0 - lru. 18541687f09Smrg * 1 - stream. 18641687f09Smrg */ 18741687f09Smrg#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 18841687f09Smrg /* 0 - micro_engine. 18941687f09Smrg */ 19041687f09Smrg 1913f012e29Smrg#define PACKET3_DMA_DATA 0x50 1923f012e29Smrg/* 1. header 1933f012e29Smrg * 2. CONTROL 1943f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1953f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1963f012e29Smrg * 5. DST_ADDR_LO [31:0] 1973f012e29Smrg * 6. DST_ADDR_HI [7:0] 1983f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1993f012e29Smrg */ 2003f012e29Smrg/* CONTROL */ 2013f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 2023f012e29Smrg /* 0 - ME 2033f012e29Smrg * 1 - PFP 2043f012e29Smrg */ 2053f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 2063f012e29Smrg /* 0 - LRU 2073f012e29Smrg * 1 - Stream 2083f012e29Smrg * 2 - Bypass 2093f012e29Smrg */ 2103f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 2113f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 2123f012e29Smrg /* 0 - DST_ADDR using DAS 2133f012e29Smrg * 1 - GDS 2143f012e29Smrg * 3 - DST_ADDR using L2 2153f012e29Smrg */ 2163f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 2173f012e29Smrg /* 0 - LRU 2183f012e29Smrg * 1 - Stream 2193f012e29Smrg * 2 - Bypass 2203f012e29Smrg */ 2213f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 2223f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 2233f012e29Smrg /* 0 - SRC_ADDR using SAS 2243f012e29Smrg * 1 - GDS 2253f012e29Smrg * 2 - DATA 2263f012e29Smrg * 3 - SRC_ADDR using L2 2273f012e29Smrg */ 2283f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 2293f012e29Smrg/* COMMAND */ 2303f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 2313f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 2323f012e29Smrg /* 0 - none 2333f012e29Smrg * 1 - 8 in 16 2343f012e29Smrg * 2 - 8 in 32 2353f012e29Smrg * 3 - 8 in 64 2363f012e29Smrg */ 2373f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 2383f012e29Smrg /* 0 - none 2393f012e29Smrg * 1 - 8 in 16 2403f012e29Smrg * 2 - 8 in 32 2413f012e29Smrg * 3 - 8 in 64 2423f012e29Smrg */ 2433f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2443f012e29Smrg /* 0 - memory 2453f012e29Smrg * 1 - register 2463f012e29Smrg */ 2473f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2483f012e29Smrg /* 0 - memory 2493f012e29Smrg * 1 - register 2503f012e29Smrg */ 2513f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2523f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2533f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2543f012e29Smrg 255d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 256d8807b2fSmrg (((b) & 0x1) << 26) | \ 257d8807b2fSmrg (((t) & 0x1) << 23) | \ 258d8807b2fSmrg (((s) & 0x1) << 22) | \ 259d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 260d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 261d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 262d8807b2fSmrg#define SDMA_NOP_SI 0xf 263d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 264d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 265d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 266d8807b2fSmrg /* 0 - ME 267d8807b2fSmrg * 1 - PFP 268d8807b2fSmrg */ 269d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 270d8807b2fSmrg /* 0 - DST_ADDR using DAS 271d8807b2fSmrg * 1 - GDS 272d8807b2fSmrg * 3 - DST_ADDR using L2 273d8807b2fSmrg */ 274d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 275d8807b2fSmrg /* 0 - SRC_ADDR using SAS 276d8807b2fSmrg * 1 - GDS 277d8807b2fSmrg * 2 - DATA 278d8807b2fSmrg * 3 - SRC_ADDR using L2 279d8807b2fSmrg */ 280d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 281d8807b2fSmrg 28200a23bdaSmrg 28300a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 28400a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28500a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 28600a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28700a23bdaSmrg 28800a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 28900a23bdaSmrg 29000a23bdaSmrg#define PKT3_SET_SH_REG 0x76 29100a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 29200a23bdaSmrg 29300a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2945324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2955324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 2965324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 2975324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 2985324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 29900a23bdaSmrg/* gfx 8 */ 30000a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 30100a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 30200a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 30300a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 30400a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 30500a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 30600a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 30700a23bdaSmrg 30800a23bdaSmrg 30900a23bdaSmrg 31000a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 31100a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 31200a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 31300a23bdaSmrg ((num & 0x000000ff) << 24)) 31400a23bdaSmrg 31500a23bdaSmrg 31600a23bdaSmrg/* Shader code 31700a23bdaSmrg * void main() 31800a23bdaSmrg{ 31900a23bdaSmrg 32000a23bdaSmrg float x = some_input; 32100a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 32200a23bdaSmrg x = sin(x); 32300a23bdaSmrg 32400a23bdaSmrg u[0] = 42u; 32500a23bdaSmrg} 32600a23bdaSmrg*/ 32700a23bdaSmrg 32800a23bdaSmrgstatic uint32_t shader_bin[] = { 32900a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 33000a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 33100a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 33200a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 33300a23bdaSmrg}; 33400a23bdaSmrg 33500a23bdaSmrg#define CODE_OFFSET 512 33600a23bdaSmrg#define DATA_OFFSET 1024 33700a23bdaSmrg 3385324fb0dSmrgenum cs_type { 3395324fb0dSmrg CS_BUFFERCLEAR, 3409bd392adSmrg CS_BUFFERCOPY, 3419bd392adSmrg CS_HANG, 3429bd392adSmrg CS_HANG_SLOW 3435324fb0dSmrg}; 3445324fb0dSmrg 3455324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3464babd585Smrg 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 3474babd585Smrg 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, 3484babd585Smrg 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, 3494babd585Smrg 0xbf810000 3505324fb0dSmrg}; 3515324fb0dSmrg 3525324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3535324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3545324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3555324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3565324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3575324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3585324fb0dSmrg}; 3595324fb0dSmrg 3605324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3615324fb0dSmrg 3625324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3634babd585Smrg 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 3644babd585Smrg 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, 3654babd585Smrg 0xe01c2000, 0x80010200, 0xbf810000 3665324fb0dSmrg}; 3675324fb0dSmrg 3685324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3695324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3705324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3715324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3725324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3735324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3745324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3755324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3765324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 37788f8a8d2Smrg 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 3785324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3795324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3805324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3815324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3825324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3835324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 38488f8a8d2Smrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 38588f8a8d2Smrg 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 3865324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 3875324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 3885324fb0dSmrg 0xc0017900, 0x24b, 0x0 3895324fb0dSmrg}; 3905324fb0dSmrg 3915324fb0dSmrgenum ps_type { 3925324fb0dSmrg PS_CONST, 3939bd392adSmrg PS_TEX, 3949bd392adSmrg PS_HANG, 3959bd392adSmrg PS_HANG_SLOW 3965324fb0dSmrg}; 3975324fb0dSmrg 3985324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 3995324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 4005324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 4015324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 4025324fb0dSmrg}; 4035324fb0dSmrg 4045324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 4055324fb0dSmrg 4065324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 4075324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4085324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 4095324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 4105324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 4115324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4125324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4135324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4145324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4155324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4165324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 4175324fb0dSmrg } 4185324fb0dSmrg}; 4195324fb0dSmrg 4205324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 4215324fb0dSmrg 0x00000004 4225324fb0dSmrg}; 4235324fb0dSmrg 4245324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 4255324fb0dSmrg 4265324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 4275324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 4285324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 4295324fb0dSmrg}; 4305324fb0dSmrg 4315324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 4325324fb0dSmrg 4335324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 4345324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4355324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 4365324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4375324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4385324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4395324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4405324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4415324fb0dSmrg}; 4425324fb0dSmrg 4435324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 4445324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 4455324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 4465324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 4475324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 4485324fb0dSmrg 0x00000100, 0xBF810000 4495324fb0dSmrg}; 4505324fb0dSmrg 4515324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 4525324fb0dSmrg 0x0000000B 4535324fb0dSmrg}; 4545324fb0dSmrg 4555324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 4565324fb0dSmrg 4575324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 4585324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4595324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 4605324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 4615324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 4625324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4635324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4645324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4655324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4665324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4675324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 4685324fb0dSmrg } 4695324fb0dSmrg}; 4705324fb0dSmrg 4715324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 4725324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 4735324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 4745324fb0dSmrg}; 4755324fb0dSmrg 4765324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 4775324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4785324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 4795324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4805324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4815324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4825324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4835324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4845324fb0dSmrg}; 4855324fb0dSmrg 4865324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 4875324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 4885324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 4895324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 4905324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 4915324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 4925324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 4935324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 4945324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 4955324fb0dSmrg}; 4965324fb0dSmrg 4975324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 4985324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 4995324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 5005324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 5019bd392adSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 5025324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 5035324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 5045324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 5055324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 5065324fb0dSmrg}; 50700a23bdaSmrg 5089bd392adSmrgunsigned int memcpy_ps_hang[] = { 5099bd392adSmrg 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 5109bd392adSmrg 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 5119bd392adSmrg 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 5129bd392adSmrg 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 5139bd392adSmrg 0xF800180F, 0x03020100, 0xBF810000 5149bd392adSmrg}; 5159bd392adSmrg 5169bd392adSmrgstruct amdgpu_test_shader { 5179bd392adSmrg uint32_t *shader; 5189bd392adSmrg uint32_t header_length; 5199bd392adSmrg uint32_t body_length; 5209bd392adSmrg uint32_t foot_length; 5219bd392adSmrg}; 5229bd392adSmrg 5239bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = { 5249bd392adSmrg 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 5259bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 5269bd392adSmrg}; 5279bd392adSmrg 5289bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 5299bd392adSmrg memcpy_cs_hang_slow_ai_codes, 5309bd392adSmrg 4, 5319bd392adSmrg 3, 5329bd392adSmrg 1 5339bd392adSmrg}; 5349bd392adSmrg 5359bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = { 5369bd392adSmrg 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 5379bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 5389bd392adSmrg}; 5399bd392adSmrg 5409bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 5419bd392adSmrg memcpy_cs_hang_slow_rv_codes, 5429bd392adSmrg 4, 5439bd392adSmrg 3, 5449bd392adSmrg 1 5459bd392adSmrg}; 5469bd392adSmrg 5479bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = { 5489bd392adSmrg 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 5499bd392adSmrg 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 5509bd392adSmrg 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 5519bd392adSmrg 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 5529bd392adSmrg 0x03020100, 0xbf810000 5539bd392adSmrg}; 5549bd392adSmrg 5559bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 5569bd392adSmrg memcpy_ps_hang_slow_ai_codes, 5579bd392adSmrg 7, 5589bd392adSmrg 2, 5599bd392adSmrg 9 5609bd392adSmrg}; 5619bd392adSmrg 5627cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 5637cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 5647cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 5657cdc0497Smrg uint64_t *mc_address, 5667cdc0497Smrg amdgpu_va_handle *va_handle) 5677cdc0497Smrg{ 5687cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 5697cdc0497Smrg amdgpu_bo_handle buf_handle; 5707cdc0497Smrg amdgpu_va_handle handle; 5717cdc0497Smrg uint64_t vmc_addr; 5727cdc0497Smrg int r; 5737cdc0497Smrg 5747cdc0497Smrg request.alloc_size = size; 5757cdc0497Smrg request.phys_alignment = alignment; 5767cdc0497Smrg request.preferred_heap = heap; 5777cdc0497Smrg request.flags = alloc_flags; 5787cdc0497Smrg 5797cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 5807cdc0497Smrg if (r) 5817cdc0497Smrg return r; 5827cdc0497Smrg 5837cdc0497Smrg r = amdgpu_va_range_alloc(dev, 5847cdc0497Smrg amdgpu_gpu_va_range_general, 5857cdc0497Smrg size, alignment, 0, &vmc_addr, 5867cdc0497Smrg &handle, 0); 5877cdc0497Smrg if (r) 5887cdc0497Smrg goto error_va_alloc; 5897cdc0497Smrg 5907cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 5917cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 5927cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 5937cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 5947cdc0497Smrg mapping_flags, 5957cdc0497Smrg AMDGPU_VA_OP_MAP); 5967cdc0497Smrg if (r) 5977cdc0497Smrg goto error_va_map; 5987cdc0497Smrg 5997cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 6007cdc0497Smrg if (r) 6017cdc0497Smrg goto error_cpu_map; 6027cdc0497Smrg 6037cdc0497Smrg *bo = buf_handle; 6047cdc0497Smrg *mc_address = vmc_addr; 6057cdc0497Smrg *va_handle = handle; 6067cdc0497Smrg 6077cdc0497Smrg return 0; 6087cdc0497Smrg 6097cdc0497Smrg error_cpu_map: 6107cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 6117cdc0497Smrg 6127cdc0497Smrg error_va_map: 6137cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 6147cdc0497Smrg 6157cdc0497Smrg error_va_alloc: 6167cdc0497Smrg amdgpu_bo_free(buf_handle); 6177cdc0497Smrg return r; 6187cdc0497Smrg} 6197cdc0497Smrg 6207cdc0497Smrg 6217cdc0497Smrg 62241687f09SmrgCU_BOOL suite_basic_tests_enable(void) 62341687f09Smrg{ 62441687f09Smrg 62541687f09Smrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 62641687f09Smrg &minor_version, &device_handle)) 62741687f09Smrg return CU_FALSE; 62841687f09Smrg 6294babd585Smrg 6304babd585Smrg family_id = device_handle->info.family_id; 6314babd585Smrg chip_id = device_handle->info.chip_external_rev; 6324babd585Smrg chip_rev = device_handle->info.chip_rev; 63341687f09Smrg 63441687f09Smrg if (amdgpu_device_deinitialize(device_handle)) 63541687f09Smrg return CU_FALSE; 63641687f09Smrg 6374babd585Smrg /* disable gfx engine basic test cases for some asics have no CPG */ 6384babd585Smrg if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 63941687f09Smrg if (amdgpu_set_test_active("Basic Tests", 64041687f09Smrg "Command submission Test (GFX)", 64141687f09Smrg CU_FALSE)) 64241687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 64341687f09Smrg CU_get_error_msg()); 64441687f09Smrg 64541687f09Smrg if (amdgpu_set_test_active("Basic Tests", 64641687f09Smrg "Command submission Test (Multi-Fence)", 64741687f09Smrg CU_FALSE)) 64841687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 64941687f09Smrg CU_get_error_msg()); 65041687f09Smrg 65141687f09Smrg if (amdgpu_set_test_active("Basic Tests", 65241687f09Smrg "Sync dependency Test", 65341687f09Smrg CU_FALSE)) 65441687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 65541687f09Smrg CU_get_error_msg()); 65641687f09Smrg } 65741687f09Smrg 65841687f09Smrg return CU_TRUE; 65941687f09Smrg} 66041687f09Smrg 6613f012e29Smrgint suite_basic_tests_init(void) 6623f012e29Smrg{ 663d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 6643f012e29Smrg int r; 6653f012e29Smrg 6663f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 6673f012e29Smrg &minor_version, &device_handle); 6683f012e29Smrg 669d8807b2fSmrg if (r) { 670037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 671037b3c26Smrg printf("\n\nError:%s. " 672037b3c26Smrg "Hint:Try to run this test program as root.", 673037b3c26Smrg strerror(errno)); 6743f012e29Smrg return CUE_SINIT_FAILED; 675037b3c26Smrg } 676d8807b2fSmrg 677d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 678d8807b2fSmrg if (r) 679d8807b2fSmrg return CUE_SINIT_FAILED; 680d8807b2fSmrg 681d8807b2fSmrg family_id = gpu_info.family_id; 682d8807b2fSmrg 683d8807b2fSmrg return CUE_SUCCESS; 6843f012e29Smrg} 6853f012e29Smrg 6863f012e29Smrgint suite_basic_tests_clean(void) 6873f012e29Smrg{ 6883f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 6893f012e29Smrg 6903f012e29Smrg if (r == 0) 6913f012e29Smrg return CUE_SUCCESS; 6923f012e29Smrg else 6933f012e29Smrg return CUE_SCLEAN_FAILED; 6943f012e29Smrg} 6953f012e29Smrg 6963f012e29Smrgstatic void amdgpu_query_info_test(void) 6973f012e29Smrg{ 6983f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 6993f012e29Smrg uint32_t version, feature; 7003f012e29Smrg int r; 7013f012e29Smrg 7023f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 7033f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7043f012e29Smrg 7053f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 7063f012e29Smrg 0, &version, &feature); 7073f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7083f012e29Smrg} 7093f012e29Smrg 7103f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 7113f012e29Smrg{ 7123f012e29Smrg amdgpu_context_handle context_handle; 7133f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 7143f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 7153f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 7163f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 7173f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 7183f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 7193f012e29Smrg uint32_t *ptr; 7203f012e29Smrg uint32_t expired; 7213f012e29Smrg amdgpu_bo_list_handle bo_list; 7223f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 723d8807b2fSmrg int r, i = 0; 7243f012e29Smrg 7253f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 7263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7273f012e29Smrg 7283f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7293f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7303f012e29Smrg &ib_result_handle, &ib_result_cpu, 7313f012e29Smrg &ib_result_mc_address, &va_handle); 7323f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7333f012e29Smrg 7343f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7353f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7363f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 7373f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 7383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7393f012e29Smrg 7403f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 7413f012e29Smrg ib_result_ce_handle, &bo_list); 7423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7433f012e29Smrg 7443f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 7453f012e29Smrg 7463f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 7473f012e29Smrg ptr = ib_result_ce_cpu; 748d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 749d8807b2fSmrg ptr[i++] = 0xc0008900; 750d8807b2fSmrg ptr[i++] = 0; 751d8807b2fSmrg } 752d8807b2fSmrg ptr[i++] = 0xc0008400; 753d8807b2fSmrg ptr[i++] = 1; 7543f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 755d8807b2fSmrg ib_info[0].size = i; 7563f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 7573f012e29Smrg 7583f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 7593f012e29Smrg ptr = ib_result_cpu; 7603f012e29Smrg ptr[0] = 0xc0008600; 7613f012e29Smrg ptr[1] = 0x00000001; 7623f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 7633f012e29Smrg ib_info[1].size = 2; 7643f012e29Smrg 7653f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 7663f012e29Smrg ibs_request.number_of_ibs = 2; 7673f012e29Smrg ibs_request.ibs = ib_info; 7683f012e29Smrg ibs_request.resources = bo_list; 7693f012e29Smrg ibs_request.fence_info.handle = NULL; 7703f012e29Smrg 7713f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 7723f012e29Smrg 7733f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7743f012e29Smrg 7753f012e29Smrg fence_status.context = context_handle; 7763f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7773f012e29Smrg fence_status.ip_instance = 0; 7783f012e29Smrg fence_status.fence = ibs_request.seq_no; 7793f012e29Smrg 7803f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7813f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7823f012e29Smrg 0, &expired); 7833f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7843f012e29Smrg 7853f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7863f012e29Smrg ib_result_mc_address, 4096); 7873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7883f012e29Smrg 7893f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 7903f012e29Smrg ib_result_ce_mc_address, 4096); 7913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7923f012e29Smrg 7933f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 7943f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7953f012e29Smrg 7963f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 7973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7983f012e29Smrg 7993f012e29Smrg} 8003f012e29Smrg 8013f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 8023f012e29Smrg{ 8033f012e29Smrg amdgpu_context_handle context_handle; 8043f012e29Smrg amdgpu_bo_handle ib_result_handle; 8053f012e29Smrg void *ib_result_cpu; 8063f012e29Smrg uint64_t ib_result_mc_address; 8073f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 8083f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 8093f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 8103f012e29Smrg uint32_t *ptr; 8113f012e29Smrg uint32_t expired; 8123f012e29Smrg amdgpu_bo_list_handle bo_list; 8133f012e29Smrg amdgpu_va_handle va_handle; 814d8807b2fSmrg int r, i = 0; 8153f012e29Smrg 8163f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 8173f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8183f012e29Smrg 8193f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 8203f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 8213f012e29Smrg &ib_result_handle, &ib_result_cpu, 8223f012e29Smrg &ib_result_mc_address, &va_handle); 8233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8243f012e29Smrg 8253f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 8263f012e29Smrg &bo_list); 8273f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8283f012e29Smrg 8293f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 8303f012e29Smrg 8313f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 8323f012e29Smrg ptr = ib_result_cpu; 833d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 834d8807b2fSmrg ptr[i++] = 0xc0008900; 835d8807b2fSmrg ptr[i++] = 0; 836d8807b2fSmrg } 837d8807b2fSmrg ptr[i++] = 0xc0008400; 838d8807b2fSmrg ptr[i++] = 1; 8393f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 840d8807b2fSmrg ib_info[0].size = i; 8413f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 8423f012e29Smrg 8433f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 8443f012e29Smrg ptr[0] = 0xc0008600; 8453f012e29Smrg ptr[1] = 0x00000001; 8463f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 8473f012e29Smrg ib_info[1].size = 2; 8483f012e29Smrg 8493f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 8503f012e29Smrg ibs_request.number_of_ibs = 2; 8513f012e29Smrg ibs_request.ibs = ib_info; 8523f012e29Smrg ibs_request.resources = bo_list; 8533f012e29Smrg ibs_request.fence_info.handle = NULL; 8543f012e29Smrg 8553f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 8563f012e29Smrg 8573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8583f012e29Smrg 8593f012e29Smrg fence_status.context = context_handle; 8603f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 8613f012e29Smrg fence_status.ip_instance = 0; 8623f012e29Smrg fence_status.fence = ibs_request.seq_no; 8633f012e29Smrg 8643f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 8653f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 8663f012e29Smrg 0, &expired); 8673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8683f012e29Smrg 8693f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 8703f012e29Smrg ib_result_mc_address, 4096); 8713f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8723f012e29Smrg 8733f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 8743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8753f012e29Smrg 8763f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 8773f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8783f012e29Smrg} 8793f012e29Smrg 8803f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 8813f012e29Smrg{ 8823f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 8833f012e29Smrg} 8843f012e29Smrg 8853f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 8863f012e29Smrg{ 8873f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 8883f012e29Smrg} 8893f012e29Smrg 8903f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 8913f012e29Smrg{ 8923f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 8933f012e29Smrg} 8943f012e29Smrg 89500a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 89600a23bdaSmrg{ 89700a23bdaSmrg const int sdma_write_length = 1024; 89800a23bdaSmrg const int pm4_dw = 256; 89900a23bdaSmrg amdgpu_context_handle context_handle; 90000a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 90100a23bdaSmrg amdgpu_bo_handle *resources; 90200a23bdaSmrg uint32_t *pm4; 90300a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 90400a23bdaSmrg struct amdgpu_cs_request *ibs_request; 90500a23bdaSmrg uint64_t bo1_mc, bo2_mc; 90600a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 90700a23bdaSmrg int i, j, r, loop1, loop2; 90800a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 90900a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 91000a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 91100a23bdaSmrg 91200a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 91300a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 91400a23bdaSmrg 91500a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 91600a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 91700a23bdaSmrg 91800a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 91900a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 92000a23bdaSmrg 92100a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 92200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 92300a23bdaSmrg 92400a23bdaSmrg /* prepare resource */ 92500a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 92600a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 92700a23bdaSmrg 92800a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 92900a23bdaSmrg 0, &vram_info); 93000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 93100a23bdaSmrg 93200a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 93300a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 93400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 93500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 93600a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 93700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 93800a23bdaSmrg 9394babd585Smrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 9404babd585Smrg 0, >t_info); 9414babd585Smrg CU_ASSERT_EQUAL(r, 0); 9424babd585Smrg 94300a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 94400a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 94500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94600a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 94700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 94800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94900a23bdaSmrg 95000a23bdaSmrg 95100a23bdaSmrg 95200a23bdaSmrg loop1 = loop2 = 0; 95300a23bdaSmrg /* run 9 circle to test all mapping combination */ 95400a23bdaSmrg while(loop1 < 2) { 95500a23bdaSmrg while(loop2 < 2) { 95600a23bdaSmrg /* allocate UC bo1for sDMA use */ 95700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 95800a23bdaSmrg sdma_write_length, 4096, 95900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 96000a23bdaSmrg gtt_flags[loop1], &bo1, 96100a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 96200a23bdaSmrg &bo1_va_handle); 96300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 96400a23bdaSmrg 96500a23bdaSmrg /* set bo1 */ 96600a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 96700a23bdaSmrg 96800a23bdaSmrg /* allocate UC bo2 for sDMA use */ 96900a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 97000a23bdaSmrg sdma_write_length, 4096, 97100a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 97200a23bdaSmrg gtt_flags[loop2], &bo2, 97300a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 97400a23bdaSmrg &bo2_va_handle); 97500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 97600a23bdaSmrg 97700a23bdaSmrg /* clear bo2 */ 97800a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 97900a23bdaSmrg 98000a23bdaSmrg resources[0] = bo1; 98100a23bdaSmrg resources[1] = bo2; 98200a23bdaSmrg resources[2] = vram_max[loop2]; 98300a23bdaSmrg resources[3] = gtt_max[loop2]; 98400a23bdaSmrg 98500a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 98600a23bdaSmrg i = j = 0; 98700a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 98800a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 98900a23bdaSmrg sdma_write_length); 99000a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 99100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 99200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 99300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 99400a23bdaSmrg } else { 99500a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 99600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 99700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 99800a23bdaSmrg else 99900a23bdaSmrg pm4[i++] = sdma_write_length; 100000a23bdaSmrg pm4[i++] = 0; 100100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 100200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 100300a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 100400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 100500a23bdaSmrg } 100600a23bdaSmrg 100700a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 100800a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 100900a23bdaSmrg i, pm4, 101000a23bdaSmrg 4, resources, 101100a23bdaSmrg ib_info, ibs_request); 101200a23bdaSmrg 101300a23bdaSmrg /* verify if SDMA test result meets with expected */ 101400a23bdaSmrg i = 0; 101500a23bdaSmrg while(i < sdma_write_length) { 101600a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 101700a23bdaSmrg } 101800a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 101900a23bdaSmrg sdma_write_length); 102000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 102100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 102200a23bdaSmrg sdma_write_length); 102300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 102400a23bdaSmrg loop2++; 102500a23bdaSmrg } 102600a23bdaSmrg loop2 = 0; 102700a23bdaSmrg loop1++; 102800a23bdaSmrg } 102900a23bdaSmrg amdgpu_bo_free(vram_max[0]); 103000a23bdaSmrg amdgpu_bo_free(vram_max[1]); 103100a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 103200a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 103300a23bdaSmrg /* clean resources */ 103400a23bdaSmrg free(resources); 103500a23bdaSmrg free(ibs_request); 103600a23bdaSmrg free(ib_info); 103700a23bdaSmrg free(pm4); 103800a23bdaSmrg 103900a23bdaSmrg /* end of test */ 104000a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 104100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 104200a23bdaSmrg} 104300a23bdaSmrg 104400a23bdaSmrg 10453f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 10463f012e29Smrg{ 10473f012e29Smrg /* write data using the CP */ 10483f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 10493f012e29Smrg /* const fill using the CP */ 10503f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 10513f012e29Smrg /* copy data using the CP */ 10523f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 10533f012e29Smrg /* separate IB buffers for multi-IB submission */ 10543f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 10553f012e29Smrg /* shared IB buffer for multi-IB submission */ 10563f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 10573f012e29Smrg} 10583f012e29Smrg 10593f012e29Smrgstatic void amdgpu_semaphore_test(void) 10603f012e29Smrg{ 10613f012e29Smrg amdgpu_context_handle context_handle[2]; 10623f012e29Smrg amdgpu_semaphore_handle sem; 10633f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 10643f012e29Smrg void *ib_result_cpu[2]; 10653f012e29Smrg uint64_t ib_result_mc_address[2]; 10663f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 10673f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 10683f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 10693f012e29Smrg uint32_t *ptr; 10703f012e29Smrg uint32_t expired; 1071d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 10723f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 10733f012e29Smrg amdgpu_va_handle va_handle[2]; 10743f012e29Smrg int r, i; 10754babd585Smrg struct amdgpu_gpu_info gpu_info = {0}; 10764babd585Smrg unsigned gc_ip_type; 10774babd585Smrg 10784babd585Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 10794babd585Smrg CU_ASSERT_EQUAL(r, 0); 10804babd585Smrg 10814babd585Smrg gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 10824babd585Smrg AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 10833f012e29Smrg 1084d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 1085d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1086d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 1087d8807b2fSmrg } else { 1088d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1089d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 1090d8807b2fSmrg } 1091d8807b2fSmrg 10923f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 10933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10943f012e29Smrg for (i = 0; i < 2; i++) { 10953f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 10963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10973f012e29Smrg 10983f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 10993f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 11003f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 11013f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 11023f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11033f012e29Smrg 11043f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 11053f012e29Smrg NULL, &bo_list[i]); 11063f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11073f012e29Smrg } 11083f012e29Smrg 11093f012e29Smrg /* 1. same context different engine */ 11103f012e29Smrg ptr = ib_result_cpu[0]; 1111d8807b2fSmrg ptr[0] = sdma_nop; 11123f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 11133f012e29Smrg ib_info[0].size = 1; 11143f012e29Smrg 11153f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 11163f012e29Smrg ibs_request[0].number_of_ibs = 1; 11173f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 11183f012e29Smrg ibs_request[0].resources = bo_list[0]; 11193f012e29Smrg ibs_request[0].fence_info.handle = NULL; 11203f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 11213f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11223f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 11233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11243f012e29Smrg 11254babd585Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 11263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11273f012e29Smrg ptr = ib_result_cpu[1]; 1128d8807b2fSmrg ptr[0] = gfx_nop; 11293f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 11303f012e29Smrg ib_info[1].size = 1; 11313f012e29Smrg 11324babd585Smrg ibs_request[1].ip_type = gc_ip_type; 11333f012e29Smrg ibs_request[1].number_of_ibs = 1; 11343f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 11353f012e29Smrg ibs_request[1].resources = bo_list[1]; 11363f012e29Smrg ibs_request[1].fence_info.handle = NULL; 11373f012e29Smrg 11383f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 11393f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11403f012e29Smrg 11413f012e29Smrg fence_status.context = context_handle[0]; 11424babd585Smrg fence_status.ip_type = gc_ip_type; 11433f012e29Smrg fence_status.ip_instance = 0; 11443f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 11453f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11463f012e29Smrg 500000000, 0, &expired); 11473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11483f012e29Smrg CU_ASSERT_EQUAL(expired, true); 11493f012e29Smrg 11503f012e29Smrg /* 2. same engine different context */ 11513f012e29Smrg ptr = ib_result_cpu[0]; 1152d8807b2fSmrg ptr[0] = gfx_nop; 11533f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 11543f012e29Smrg ib_info[0].size = 1; 11553f012e29Smrg 11564babd585Smrg ibs_request[0].ip_type = gc_ip_type; 11573f012e29Smrg ibs_request[0].number_of_ibs = 1; 11583f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 11593f012e29Smrg ibs_request[0].resources = bo_list[0]; 11603f012e29Smrg ibs_request[0].fence_info.handle = NULL; 11613f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 11623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11634babd585Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 11643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11653f012e29Smrg 11664babd585Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem); 11673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11683f012e29Smrg ptr = ib_result_cpu[1]; 1169d8807b2fSmrg ptr[0] = gfx_nop; 11703f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 11713f012e29Smrg ib_info[1].size = 1; 11723f012e29Smrg 11734babd585Smrg ibs_request[1].ip_type = gc_ip_type; 11743f012e29Smrg ibs_request[1].number_of_ibs = 1; 11753f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 11763f012e29Smrg ibs_request[1].resources = bo_list[1]; 11773f012e29Smrg ibs_request[1].fence_info.handle = NULL; 11783f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 11793f012e29Smrg 11803f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11813f012e29Smrg 11823f012e29Smrg fence_status.context = context_handle[1]; 11834babd585Smrg fence_status.ip_type = gc_ip_type; 11843f012e29Smrg fence_status.ip_instance = 0; 11853f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 11863f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11873f012e29Smrg 500000000, 0, &expired); 11883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11893f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1190d8807b2fSmrg 11913f012e29Smrg for (i = 0; i < 2; i++) { 11923f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 11933f012e29Smrg ib_result_mc_address[i], 4096); 11943f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11953f012e29Smrg 11963f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 11973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11983f012e29Smrg 11993f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 12003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12013f012e29Smrg } 12023f012e29Smrg 12033f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 12043f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12053f012e29Smrg} 12063f012e29Smrg 12073f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 12083f012e29Smrg{ 12093f012e29Smrg amdgpu_context_handle context_handle; 12103f012e29Smrg amdgpu_bo_handle ib_result_handle; 12113f012e29Smrg void *ib_result_cpu; 12123f012e29Smrg uint64_t ib_result_mc_address; 12133f012e29Smrg struct amdgpu_cs_request ibs_request; 12143f012e29Smrg struct amdgpu_cs_ib_info ib_info; 12153f012e29Smrg struct amdgpu_cs_fence fence_status; 12163f012e29Smrg uint32_t *ptr; 12173f012e29Smrg uint32_t expired; 121800a23bdaSmrg int r, instance; 12193f012e29Smrg amdgpu_bo_list_handle bo_list; 12203f012e29Smrg amdgpu_va_handle va_handle; 1221d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1222d8807b2fSmrg 1223d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1224d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 12253f012e29Smrg 12263f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 12273f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12283f012e29Smrg 1229d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 12303f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 12313f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 12323f012e29Smrg &ib_result_handle, &ib_result_cpu, 12333f012e29Smrg &ib_result_mc_address, &va_handle); 12343f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12353f012e29Smrg 12363f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 12373f012e29Smrg &bo_list); 12383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12393f012e29Smrg 12403f012e29Smrg ptr = ib_result_cpu; 1241d8807b2fSmrg memset(ptr, 0, 16); 1242d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 12433f012e29Smrg 12443f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 12453f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 12463f012e29Smrg ib_info.size = 16; 12473f012e29Smrg 12483f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 12493f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 12503f012e29Smrg ibs_request.ring = instance; 12513f012e29Smrg ibs_request.number_of_ibs = 1; 12523f012e29Smrg ibs_request.ibs = &ib_info; 12533f012e29Smrg ibs_request.resources = bo_list; 12543f012e29Smrg ibs_request.fence_info.handle = NULL; 12553f012e29Smrg 12563f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 12573f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 12583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12593f012e29Smrg 12603f012e29Smrg fence_status.context = context_handle; 12613f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 12623f012e29Smrg fence_status.ip_instance = 0; 12633f012e29Smrg fence_status.ring = instance; 12643f012e29Smrg fence_status.fence = ibs_request.seq_no; 12653f012e29Smrg 12663f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 12673f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 12683f012e29Smrg 0, &expired); 12693f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12703f012e29Smrg 12713f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 12723f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12733f012e29Smrg 12743f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 12753f012e29Smrg ib_result_mc_address, 4096); 12763f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12773f012e29Smrg } 12783f012e29Smrg 12793f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 12803f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12813f012e29Smrg} 12823f012e29Smrg 12833f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 12843f012e29Smrg{ 12853f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 12863f012e29Smrg} 12873f012e29Smrg 12883f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 12893f012e29Smrg{ 12903f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 12913f012e29Smrg} 12923f012e29Smrg 12933f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 12943f012e29Smrg{ 12953f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 12963f012e29Smrg} 12973f012e29Smrg 12983f012e29Smrgstatic void amdgpu_command_submission_compute(void) 12993f012e29Smrg{ 13003f012e29Smrg /* write data using the CP */ 13013f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 13023f012e29Smrg /* const fill using the CP */ 13033f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 13043f012e29Smrg /* copy data using the CP */ 13053f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 13063f012e29Smrg /* nop test */ 13073f012e29Smrg amdgpu_command_submission_compute_nop(); 13083f012e29Smrg} 13093f012e29Smrg 13103f012e29Smrg/* 13113f012e29Smrg * caller need create/release: 13123f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 13133f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 13143f012e29Smrg */ 131541687f09Smrgvoid 131641687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 131741687f09Smrg amdgpu_context_handle context_handle, 131841687f09Smrg unsigned ip_type, int instance, int pm4_dw, 131941687f09Smrg uint32_t *pm4_src, int res_cnt, 132041687f09Smrg amdgpu_bo_handle *resources, 132141687f09Smrg struct amdgpu_cs_ib_info *ib_info, 132241687f09Smrg struct amdgpu_cs_request *ibs_request, 132341687f09Smrg bool secure) 13243f012e29Smrg{ 13253f012e29Smrg int r; 13263f012e29Smrg uint32_t expired; 13273f012e29Smrg uint32_t *ring_ptr; 13283f012e29Smrg amdgpu_bo_handle ib_result_handle; 13293f012e29Smrg void *ib_result_cpu; 13303f012e29Smrg uint64_t ib_result_mc_address; 13313f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 13323f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 13333f012e29Smrg amdgpu_va_handle va_handle; 13343f012e29Smrg 13353f012e29Smrg /* prepare CS */ 13363f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 13373f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 13383f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 13393f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13403f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 13413f012e29Smrg 13423f012e29Smrg /* allocate IB */ 13433f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 13443f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 13453f012e29Smrg &ib_result_handle, &ib_result_cpu, 13463f012e29Smrg &ib_result_mc_address, &va_handle); 13473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13483f012e29Smrg 13493f012e29Smrg /* copy PM4 packet to ring from caller */ 13503f012e29Smrg ring_ptr = ib_result_cpu; 13513f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 13523f012e29Smrg 13533f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 13543f012e29Smrg ib_info->size = pm4_dw; 135541687f09Smrg if (secure) 135641687f09Smrg ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 13573f012e29Smrg 13583f012e29Smrg ibs_request->ip_type = ip_type; 13593f012e29Smrg ibs_request->ring = instance; 13603f012e29Smrg ibs_request->number_of_ibs = 1; 13613f012e29Smrg ibs_request->ibs = ib_info; 13623f012e29Smrg ibs_request->fence_info.handle = NULL; 13633f012e29Smrg 13643f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 13653f012e29Smrg all_res[res_cnt] = ib_result_handle; 13663f012e29Smrg 13673f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 13683f012e29Smrg NULL, &ibs_request->resources); 13693f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13703f012e29Smrg 13713f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13723f012e29Smrg 13733f012e29Smrg /* submit CS */ 13743f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 13753f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13763f012e29Smrg 13773f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 13783f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13793f012e29Smrg 13803f012e29Smrg fence_status.ip_type = ip_type; 13813f012e29Smrg fence_status.ip_instance = 0; 13823f012e29Smrg fence_status.ring = ibs_request->ring; 13833f012e29Smrg fence_status.context = context_handle; 13843f012e29Smrg fence_status.fence = ibs_request->seq_no; 13853f012e29Smrg 13863f012e29Smrg /* wait for IB accomplished */ 13873f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 13883f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 13893f012e29Smrg 0, &expired); 13903f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13913f012e29Smrg CU_ASSERT_EQUAL(expired, true); 13923f012e29Smrg 13933f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 13943f012e29Smrg ib_result_mc_address, 4096); 13953f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13963f012e29Smrg} 13973f012e29Smrg 139841687f09Smrgstatic void 139941687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 140041687f09Smrg unsigned ip_type, int instance, int pm4_dw, 140141687f09Smrg uint32_t *pm4_src, int res_cnt, 140241687f09Smrg amdgpu_bo_handle *resources, 140341687f09Smrg struct amdgpu_cs_ib_info *ib_info, 140441687f09Smrg struct amdgpu_cs_request *ibs_request) 140541687f09Smrg{ 140641687f09Smrg amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 140741687f09Smrg ip_type, instance, pm4_dw, pm4_src, 140841687f09Smrg res_cnt, resources, ib_info, 140941687f09Smrg ibs_request, false); 141041687f09Smrg} 141141687f09Smrg 141241687f09Smrgvoid 141341687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 141441687f09Smrg device, unsigned 141541687f09Smrg ip_type, bool secure) 14163f012e29Smrg{ 14173f012e29Smrg const int sdma_write_length = 128; 14183f012e29Smrg const int pm4_dw = 256; 14193f012e29Smrg amdgpu_context_handle context_handle; 14203f012e29Smrg amdgpu_bo_handle bo; 14213f012e29Smrg amdgpu_bo_handle *resources; 14223f012e29Smrg uint32_t *pm4; 14233f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 14243f012e29Smrg struct amdgpu_cs_request *ibs_request; 14253f012e29Smrg uint64_t bo_mc; 14263f012e29Smrg volatile uint32_t *bo_cpu; 142741687f09Smrg uint32_t bo_cpu_origin; 142800a23bdaSmrg int i, j, r, loop, ring_id; 14293f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 14303f012e29Smrg amdgpu_va_handle va_handle; 143100a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 14323f012e29Smrg 14333f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 14343f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 14353f012e29Smrg 14363f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 14373f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 14383f012e29Smrg 14393f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 14403f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 14413f012e29Smrg 144241687f09Smrg r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 144300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 144400a23bdaSmrg 144541687f09Smrg for (i = 0; secure && (i < 2); i++) 144641687f09Smrg gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 144741687f09Smrg 144841687f09Smrg r = amdgpu_cs_ctx_create(device, &context_handle); 144941687f09Smrg 14503f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14513f012e29Smrg 14523f012e29Smrg /* prepare resource */ 14533f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 14543f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 14553f012e29Smrg 145600a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 145700a23bdaSmrg loop = 0; 145800a23bdaSmrg while(loop < 2) { 145900a23bdaSmrg /* allocate UC bo for sDMA use */ 146041687f09Smrg r = amdgpu_bo_alloc_and_map(device, 146100a23bdaSmrg sdma_write_length * sizeof(uint32_t), 146200a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 146300a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 146400a23bdaSmrg &bo_mc, &va_handle); 146500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 14663f012e29Smrg 146700a23bdaSmrg /* clear bo */ 146800a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 14693f012e29Smrg 147000a23bdaSmrg resources[0] = bo; 14713f012e29Smrg 147200a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 147300a23bdaSmrg i = j = 0; 147400a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 147500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 147600a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 147700a23bdaSmrg sdma_write_length); 147800a23bdaSmrg else 147900a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 148041687f09Smrg SDMA_WRITE_SUB_OPCODE_LINEAR, 148141687f09Smrg secure ? SDMA_ATOMIC_TMZ(1) : 0); 148241687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 148300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 148400a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 148500a23bdaSmrg pm4[i++] = sdma_write_length - 1; 148600a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 148700a23bdaSmrg pm4[i++] = sdma_write_length; 148800a23bdaSmrg while(j++ < sdma_write_length) 148900a23bdaSmrg pm4[i++] = 0xdeadbeaf; 149000a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 149100a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 149200a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 149300a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 149400a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 149500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 149600a23bdaSmrg while(j++ < sdma_write_length) 149700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 149800a23bdaSmrg } 14993f012e29Smrg 150041687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 150141687f09Smrg ip_type, ring_id, i, pm4, 150241687f09Smrg 1, resources, ib_info, 150341687f09Smrg ibs_request, secure); 15043f012e29Smrg 150500a23bdaSmrg /* verify if SDMA test result meets with expected */ 150600a23bdaSmrg i = 0; 150741687f09Smrg if (!secure) { 150841687f09Smrg while(i < sdma_write_length) { 150941687f09Smrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 151041687f09Smrg } 151141687f09Smrg } else if (ip_type == AMDGPU_HW_IP_GFX) { 151241687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 151341687f09Smrg pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 151441687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 151541687f09Smrg * command, 1-loop_until_compare_satisfied. 151641687f09Smrg * single_pass_atomic, 0-lru 151741687f09Smrg * engine_sel, 0-micro_engine 151841687f09Smrg */ 151941687f09Smrg pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 152041687f09Smrg ATOMIC_MEM_COMMAND(1) | 152141687f09Smrg ATOMIC_MEM_CACHEPOLICAY(0) | 152241687f09Smrg ATOMIC_MEM_ENGINESEL(0)); 152341687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 152441687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 152541687f09Smrg pm4[i++] = 0x12345678; 152641687f09Smrg pm4[i++] = 0x0; 152741687f09Smrg pm4[i++] = 0xdeadbeaf; 152841687f09Smrg pm4[i++] = 0x0; 152941687f09Smrg pm4[i++] = 0x100; 153041687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 153141687f09Smrg ip_type, ring_id, i, pm4, 153241687f09Smrg 1, resources, ib_info, 153341687f09Smrg ibs_request, true); 153441687f09Smrg } else if (ip_type == AMDGPU_HW_IP_DMA) { 153541687f09Smrg /* restore the bo_cpu to compare */ 153641687f09Smrg bo_cpu_origin = bo_cpu[0]; 153741687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 153841687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 153941687f09Smrg * loop, 1-loop_until_compare_satisfied. 154041687f09Smrg * single_pass_atomic, 0-lru 154141687f09Smrg */ 154241687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 154341687f09Smrg 0, 154441687f09Smrg SDMA_ATOMIC_LOOP(1) | 154541687f09Smrg SDMA_ATOMIC_TMZ(1) | 154641687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 154741687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 154841687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 154941687f09Smrg pm4[i++] = 0x12345678; 155041687f09Smrg pm4[i++] = 0x0; 155141687f09Smrg pm4[i++] = 0xdeadbeaf; 155241687f09Smrg pm4[i++] = 0x0; 155341687f09Smrg pm4[i++] = 0x100; 155441687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 155541687f09Smrg ip_type, ring_id, i, pm4, 155641687f09Smrg 1, resources, ib_info, 155741687f09Smrg ibs_request, true); 155841687f09Smrg /* DMA's atomic behavir is unlike GFX 155941687f09Smrg * If the comparing data is not equal to destination data, 156041687f09Smrg * For GFX, loop again till gfx timeout(system hang). 156141687f09Smrg * For DMA, loop again till timer expired and then send interrupt. 156241687f09Smrg * So testcase can't use interrupt mechanism. 156341687f09Smrg * We take another way to verify. When the comparing data is not 156441687f09Smrg * equal to destination data, overwrite the source data to the destination 156541687f09Smrg * buffer. Otherwise, original destination data unchanged. 156641687f09Smrg * So if the bo_cpu data is overwritten, the result is passed. 156741687f09Smrg */ 156841687f09Smrg CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 156941687f09Smrg 157041687f09Smrg /* compare again for the case of dest_data != cmp_data */ 157141687f09Smrg i = 0; 157241687f09Smrg /* restore again, here dest_data should be */ 157341687f09Smrg bo_cpu_origin = bo_cpu[0]; 157441687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 157541687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 157641687f09Smrg 0, 157741687f09Smrg SDMA_ATOMIC_LOOP(1) | 157841687f09Smrg SDMA_ATOMIC_TMZ(1) | 157941687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 158041687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 158141687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 158241687f09Smrg pm4[i++] = 0x87654321; 158341687f09Smrg pm4[i++] = 0x0; 158441687f09Smrg pm4[i++] = 0xdeadbeaf; 158541687f09Smrg pm4[i++] = 0x0; 158641687f09Smrg pm4[i++] = 0x100; 158741687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 158841687f09Smrg ip_type, ring_id, i, pm4, 158941687f09Smrg 1, resources, ib_info, 159041687f09Smrg ibs_request, true); 159141687f09Smrg /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 159241687f09Smrg CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 159300a23bdaSmrg } 15943f012e29Smrg 159500a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 159600a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 159700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 159800a23bdaSmrg loop++; 15993f012e29Smrg } 16003f012e29Smrg } 16013f012e29Smrg /* clean resources */ 16023f012e29Smrg free(resources); 16033f012e29Smrg free(ibs_request); 16043f012e29Smrg free(ib_info); 16053f012e29Smrg free(pm4); 16063f012e29Smrg 16073f012e29Smrg /* end of test */ 16083f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 16093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16103f012e29Smrg} 16113f012e29Smrg 161241687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 161341687f09Smrg{ 161441687f09Smrg amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 161541687f09Smrg ip_type, 161641687f09Smrg false); 161741687f09Smrg} 161841687f09Smrg 16193f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 16203f012e29Smrg{ 16213f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 16223f012e29Smrg} 16233f012e29Smrg 16243f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 16253f012e29Smrg{ 16263f012e29Smrg const int sdma_write_length = 1024 * 1024; 16273f012e29Smrg const int pm4_dw = 256; 16283f012e29Smrg amdgpu_context_handle context_handle; 16293f012e29Smrg amdgpu_bo_handle bo; 16303f012e29Smrg amdgpu_bo_handle *resources; 16313f012e29Smrg uint32_t *pm4; 16323f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 16333f012e29Smrg struct amdgpu_cs_request *ibs_request; 16343f012e29Smrg uint64_t bo_mc; 16353f012e29Smrg volatile uint32_t *bo_cpu; 163600a23bdaSmrg int i, j, r, loop, ring_id; 16373f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 16383f012e29Smrg amdgpu_va_handle va_handle; 163900a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 16403f012e29Smrg 16413f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 16423f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 16433f012e29Smrg 16443f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 16453f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 16463f012e29Smrg 16473f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 16483f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 16493f012e29Smrg 165000a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 165100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 165200a23bdaSmrg 16533f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 16543f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16553f012e29Smrg 16563f012e29Smrg /* prepare resource */ 16573f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 16583f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 16593f012e29Smrg 166000a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 166100a23bdaSmrg loop = 0; 166200a23bdaSmrg while(loop < 2) { 166300a23bdaSmrg /* allocate UC bo for sDMA use */ 166400a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 166500a23bdaSmrg sdma_write_length, 4096, 166600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 166700a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 166800a23bdaSmrg &bo_mc, &va_handle); 166900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 16703f012e29Smrg 167100a23bdaSmrg /* clear bo */ 167200a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 16733f012e29Smrg 167400a23bdaSmrg resources[0] = bo; 16753f012e29Smrg 167600a23bdaSmrg /* fulfill PM4: test DMA const fill */ 167700a23bdaSmrg i = j = 0; 167800a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 167900a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 168000a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 168100a23bdaSmrg 0, 0, 0, 168200a23bdaSmrg sdma_write_length / 4); 168300a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 168400a23bdaSmrg pm4[i++] = 0xdeadbeaf; 168500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 168600a23bdaSmrg } else { 168700a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 168800a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 168900a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 169000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 169100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 169200a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 169300a23bdaSmrg pm4[i++] = sdma_write_length - 1; 169400a23bdaSmrg else 169500a23bdaSmrg pm4[i++] = sdma_write_length; 169600a23bdaSmrg } 169700a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 169800a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 169900a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 170000a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 170100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 170200a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 170300a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 170400a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 170500a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 170600a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 170700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1708d8807b2fSmrg pm4[i++] = sdma_write_length; 170900a23bdaSmrg } else { 171000a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 171100a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 171200a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 171300a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 171400a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 171500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 171600a23bdaSmrg pm4[i++] = 0; 171700a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 171800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 171900a23bdaSmrg pm4[i++] = sdma_write_length; 172000a23bdaSmrg } 1721d8807b2fSmrg } 17223f012e29Smrg 172300a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 172400a23bdaSmrg ip_type, ring_id, 172500a23bdaSmrg i, pm4, 172600a23bdaSmrg 1, resources, 172700a23bdaSmrg ib_info, ibs_request); 17283f012e29Smrg 172900a23bdaSmrg /* verify if SDMA test result meets with expected */ 173000a23bdaSmrg i = 0; 173100a23bdaSmrg while(i < (sdma_write_length / 4)) { 173200a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 173300a23bdaSmrg } 17343f012e29Smrg 173500a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 173600a23bdaSmrg sdma_write_length); 173700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 173800a23bdaSmrg loop++; 173900a23bdaSmrg } 17403f012e29Smrg } 17413f012e29Smrg /* clean resources */ 17423f012e29Smrg free(resources); 17433f012e29Smrg free(ibs_request); 17443f012e29Smrg free(ib_info); 17453f012e29Smrg free(pm4); 17463f012e29Smrg 17473f012e29Smrg /* end of test */ 17483f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 17493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17503f012e29Smrg} 17513f012e29Smrg 17523f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 17533f012e29Smrg{ 17543f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 17553f012e29Smrg} 17563f012e29Smrg 17573f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 17583f012e29Smrg{ 17593f012e29Smrg const int sdma_write_length = 1024; 17603f012e29Smrg const int pm4_dw = 256; 17613f012e29Smrg amdgpu_context_handle context_handle; 17623f012e29Smrg amdgpu_bo_handle bo1, bo2; 17633f012e29Smrg amdgpu_bo_handle *resources; 17643f012e29Smrg uint32_t *pm4; 17653f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 17663f012e29Smrg struct amdgpu_cs_request *ibs_request; 17673f012e29Smrg uint64_t bo1_mc, bo2_mc; 17683f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 176900a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 17703f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 17713f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 177200a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 17733f012e29Smrg 17743f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 17753f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 17763f012e29Smrg 17773f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 17783f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 17793f012e29Smrg 17803f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 17813f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 17823f012e29Smrg 178300a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 178400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 178500a23bdaSmrg 17863f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 17873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17883f012e29Smrg 17893f012e29Smrg /* prepare resource */ 17903f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 17913f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 17923f012e29Smrg 179300a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 179400a23bdaSmrg loop1 = loop2 = 0; 179500a23bdaSmrg /* run 9 circle to test all mapping combination */ 179600a23bdaSmrg while(loop1 < 2) { 179700a23bdaSmrg while(loop2 < 2) { 179800a23bdaSmrg /* allocate UC bo1for sDMA use */ 179900a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 180000a23bdaSmrg sdma_write_length, 4096, 180100a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 180200a23bdaSmrg gtt_flags[loop1], &bo1, 180300a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 180400a23bdaSmrg &bo1_va_handle); 180500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 180600a23bdaSmrg 180700a23bdaSmrg /* set bo1 */ 180800a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 180900a23bdaSmrg 181000a23bdaSmrg /* allocate UC bo2 for sDMA use */ 181100a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 181200a23bdaSmrg sdma_write_length, 4096, 181300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 181400a23bdaSmrg gtt_flags[loop2], &bo2, 181500a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 181600a23bdaSmrg &bo2_va_handle); 181700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 181800a23bdaSmrg 181900a23bdaSmrg /* clear bo2 */ 182000a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 182100a23bdaSmrg 182200a23bdaSmrg resources[0] = bo1; 182300a23bdaSmrg resources[1] = bo2; 182400a23bdaSmrg 182500a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 182600a23bdaSmrg i = j = 0; 182700a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 182800a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 182900a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 183000a23bdaSmrg 0, 0, 0, 183100a23bdaSmrg sdma_write_length); 183200a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 183300a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 183400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 183500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 183600a23bdaSmrg } else { 183700a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 183800a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 183900a23bdaSmrg 0); 184000a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 184100a23bdaSmrg pm4[i++] = sdma_write_length - 1; 184200a23bdaSmrg else 184300a23bdaSmrg pm4[i++] = sdma_write_length; 184400a23bdaSmrg pm4[i++] = 0; 184500a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 184600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 184700a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 184800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 184900a23bdaSmrg } 185000a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 185100a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 185200a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 185300a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 185400a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 185500a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 185600a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 185700a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 185800a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 185900a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 186000a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 186100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1862d8807b2fSmrg pm4[i++] = sdma_write_length; 186300a23bdaSmrg } else { 186400a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 186500a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 186600a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 186700a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 186800a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 186900a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 187000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 187100a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 187200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 187300a23bdaSmrg pm4[i++] = sdma_write_length; 187400a23bdaSmrg } 1875d8807b2fSmrg } 18763f012e29Smrg 187700a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 187800a23bdaSmrg ip_type, ring_id, 187900a23bdaSmrg i, pm4, 188000a23bdaSmrg 2, resources, 188100a23bdaSmrg ib_info, ibs_request); 18823f012e29Smrg 188300a23bdaSmrg /* verify if SDMA test result meets with expected */ 188400a23bdaSmrg i = 0; 188500a23bdaSmrg while(i < sdma_write_length) { 188600a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 188700a23bdaSmrg } 188800a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 188900a23bdaSmrg sdma_write_length); 189000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 189100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 189200a23bdaSmrg sdma_write_length); 189300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 189400a23bdaSmrg loop2++; 18953f012e29Smrg } 189600a23bdaSmrg loop1++; 18973f012e29Smrg } 18983f012e29Smrg } 18993f012e29Smrg /* clean resources */ 19003f012e29Smrg free(resources); 19013f012e29Smrg free(ibs_request); 19023f012e29Smrg free(ib_info); 19033f012e29Smrg free(pm4); 19043f012e29Smrg 19053f012e29Smrg /* end of test */ 19063f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 19073f012e29Smrg CU_ASSERT_EQUAL(r, 0); 19083f012e29Smrg} 19093f012e29Smrg 19103f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 19113f012e29Smrg{ 19123f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 19133f012e29Smrg} 19143f012e29Smrg 19153f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 19163f012e29Smrg{ 19173f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 19183f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 19193f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 19203f012e29Smrg} 19213f012e29Smrg 1922d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1923d8807b2fSmrg{ 1924d8807b2fSmrg amdgpu_context_handle context_handle; 1925d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1926d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1927d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1928d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1929d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1930d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1931d8807b2fSmrg uint32_t *ptr; 1932d8807b2fSmrg uint32_t expired; 1933d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1934d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1935d8807b2fSmrg int r; 1936d8807b2fSmrg int i = 0, ib_cs_num = 2; 1937d8807b2fSmrg 1938d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1939d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1940d8807b2fSmrg 1941d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1942d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1943d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1944d8807b2fSmrg &ib_result_mc_address, &va_handle); 1945d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1946d8807b2fSmrg 1947d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1948d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1949d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1950d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1951d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1952d8807b2fSmrg 1953d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1954d8807b2fSmrg ib_result_ce_handle, &bo_list); 1955d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1956d8807b2fSmrg 1957d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1958d8807b2fSmrg 1959d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1960d8807b2fSmrg ptr = ib_result_ce_cpu; 1961d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1962d8807b2fSmrg ptr[i++] = 0xc0008900; 1963d8807b2fSmrg ptr[i++] = 0; 1964d8807b2fSmrg } 1965d8807b2fSmrg ptr[i++] = 0xc0008400; 1966d8807b2fSmrg ptr[i++] = 1; 1967d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1968d8807b2fSmrg ib_info[0].size = i; 1969d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1970d8807b2fSmrg 1971d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1972d8807b2fSmrg ptr = ib_result_cpu; 1973d8807b2fSmrg ptr[0] = 0xc0008600; 1974d8807b2fSmrg ptr[1] = 0x00000001; 1975d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1976d8807b2fSmrg ib_info[1].size = 2; 1977d8807b2fSmrg 1978d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1979d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1980d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1981d8807b2fSmrg ibs_request[i].ibs = ib_info; 1982d8807b2fSmrg ibs_request[i].resources = bo_list; 1983d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1984d8807b2fSmrg } 1985d8807b2fSmrg 1986d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1987d8807b2fSmrg 1988d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1989d8807b2fSmrg 1990d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1991d8807b2fSmrg fence_status[i].context = context_handle; 1992d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1993d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1994d8807b2fSmrg } 1995d8807b2fSmrg 1996d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1997d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1998d8807b2fSmrg &expired, NULL); 1999d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2000d8807b2fSmrg 2001d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2002d8807b2fSmrg ib_result_mc_address, 4096); 2003d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2004d8807b2fSmrg 2005d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 2006d8807b2fSmrg ib_result_ce_mc_address, 4096); 2007d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2008d8807b2fSmrg 2009d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 2010d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2011d8807b2fSmrg 2012d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 2013d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2014d8807b2fSmrg} 2015d8807b2fSmrg 2016d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 2017d8807b2fSmrg{ 2018d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 2019d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 2020d8807b2fSmrg} 2021d8807b2fSmrg 20223f012e29Smrgstatic void amdgpu_userptr_test(void) 20233f012e29Smrg{ 20243f012e29Smrg int i, r, j; 20253f012e29Smrg uint32_t *pm4 = NULL; 20263f012e29Smrg uint64_t bo_mc; 20273f012e29Smrg void *ptr = NULL; 20283f012e29Smrg int pm4_dw = 256; 20293f012e29Smrg int sdma_write_length = 4; 20303f012e29Smrg amdgpu_bo_handle handle; 20313f012e29Smrg amdgpu_context_handle context_handle; 20323f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 20333f012e29Smrg struct amdgpu_cs_request *ibs_request; 20343f012e29Smrg amdgpu_bo_handle buf_handle; 20353f012e29Smrg amdgpu_va_handle va_handle; 20363f012e29Smrg 20373f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 20383f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 20393f012e29Smrg 20403f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 20413f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 20423f012e29Smrg 20433f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 20443f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 20453f012e29Smrg 20463f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 20473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20483f012e29Smrg 20493f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 20503f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 20513f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 20523f012e29Smrg 20533f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 20543f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 20553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20563f012e29Smrg 20573f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 20583f012e29Smrg amdgpu_gpu_va_range_general, 20593f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 20603f012e29Smrg &va_handle, 0); 20613f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20623f012e29Smrg 20633f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 20643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20653f012e29Smrg 20663f012e29Smrg handle = buf_handle; 20673f012e29Smrg 20683f012e29Smrg j = i = 0; 2069d8807b2fSmrg 2070d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 2071d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2072d8807b2fSmrg sdma_write_length); 2073d8807b2fSmrg else 2074d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2075d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 20763f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 20773f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2078d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 2079d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 2080d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 2081d8807b2fSmrg pm4[i++] = sdma_write_length; 20823f012e29Smrg 20833f012e29Smrg while (j++ < sdma_write_length) 20843f012e29Smrg pm4[i++] = 0xdeadbeaf; 20853f012e29Smrg 208600a23bdaSmrg if (!fork()) { 208700a23bdaSmrg pm4[0] = 0x0; 208800a23bdaSmrg exit(0); 208900a23bdaSmrg } 209000a23bdaSmrg 20913f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 20923f012e29Smrg AMDGPU_HW_IP_DMA, 0, 20933f012e29Smrg i, pm4, 20943f012e29Smrg 1, &handle, 20953f012e29Smrg ib_info, ibs_request); 20963f012e29Smrg i = 0; 20973f012e29Smrg while (i < sdma_write_length) { 20983f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 20993f012e29Smrg } 21003f012e29Smrg free(ibs_request); 21013f012e29Smrg free(ib_info); 21023f012e29Smrg free(pm4); 21033f012e29Smrg 21043f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 21053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21063f012e29Smrg r = amdgpu_va_range_free(va_handle); 21073f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21083f012e29Smrg r = amdgpu_bo_free(buf_handle); 21093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21103f012e29Smrg free(ptr); 21113f012e29Smrg 21123f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 21133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 211400a23bdaSmrg 211500a23bdaSmrg wait(NULL); 211600a23bdaSmrg} 211700a23bdaSmrg 211800a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 211900a23bdaSmrg{ 212000a23bdaSmrg amdgpu_context_handle context_handle[2]; 212100a23bdaSmrg amdgpu_bo_handle ib_result_handle; 212200a23bdaSmrg void *ib_result_cpu; 212300a23bdaSmrg uint64_t ib_result_mc_address; 212400a23bdaSmrg struct amdgpu_cs_request ibs_request; 212500a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 212600a23bdaSmrg struct amdgpu_cs_fence fence_status; 212700a23bdaSmrg uint32_t expired; 212800a23bdaSmrg int i, j, r; 212900a23bdaSmrg amdgpu_bo_list_handle bo_list; 213000a23bdaSmrg amdgpu_va_handle va_handle; 213100a23bdaSmrg static uint32_t *ptr; 213200a23bdaSmrg uint64_t seq_no; 213300a23bdaSmrg 213400a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 213500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 213600a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 213700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 213800a23bdaSmrg 213900a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 214000a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 214100a23bdaSmrg &ib_result_handle, &ib_result_cpu, 214200a23bdaSmrg &ib_result_mc_address, &va_handle); 214300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 214400a23bdaSmrg 214500a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 214600a23bdaSmrg &bo_list); 214700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 214800a23bdaSmrg 214900a23bdaSmrg ptr = ib_result_cpu; 215000a23bdaSmrg i = 0; 215100a23bdaSmrg 215200a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 215300a23bdaSmrg 215400a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 215500a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 215600a23bdaSmrg ptr[i++] = 0x80000000; 215700a23bdaSmrg ptr[i++] = 0x80000000; 215800a23bdaSmrg 215900a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 216000a23bdaSmrg ptr[i++] = 0x80000000; 216100a23bdaSmrg 216200a23bdaSmrg 216300a23bdaSmrg /* Program compute regs */ 216400a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 216500a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 216600a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 216700a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 216800a23bdaSmrg 216900a23bdaSmrg 217000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 217100a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 217200a23bdaSmrg /* 217300a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 217400a23bdaSmrg SGPRS = 1 217500a23bdaSmrg PRIORITY = 0 217600a23bdaSmrg FLOAT_MODE = 192 (0xc0) 217700a23bdaSmrg PRIV = 0 217800a23bdaSmrg DX10_CLAMP = 1 217900a23bdaSmrg DEBUG_MODE = 0 218000a23bdaSmrg IEEE_MODE = 0 218100a23bdaSmrg BULKY = 0 218200a23bdaSmrg CDBG_USER = 0 218300a23bdaSmrg * 218400a23bdaSmrg */ 218500a23bdaSmrg ptr[i++] = 0x002c0040; 218600a23bdaSmrg 218700a23bdaSmrg 218800a23bdaSmrg /* 218900a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 219000a23bdaSmrg USER_SGPR = 8 219100a23bdaSmrg TRAP_PRESENT = 0 219200a23bdaSmrg TGID_X_EN = 0 219300a23bdaSmrg TGID_Y_EN = 0 219400a23bdaSmrg TGID_Z_EN = 0 219500a23bdaSmrg TG_SIZE_EN = 0 219600a23bdaSmrg TIDIG_COMP_CNT = 0 219700a23bdaSmrg EXCP_EN_MSB = 0 219800a23bdaSmrg LDS_SIZE = 0 219900a23bdaSmrg EXCP_EN = 0 220000a23bdaSmrg * 220100a23bdaSmrg */ 220200a23bdaSmrg ptr[i++] = 0x00000010; 220300a23bdaSmrg 220400a23bdaSmrg 220500a23bdaSmrg/* 220600a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 220700a23bdaSmrg WAVESIZE = 0 220800a23bdaSmrg * 220900a23bdaSmrg */ 221000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 221100a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 221200a23bdaSmrg ptr[i++] = 0x00000100; 221300a23bdaSmrg 221400a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 221500a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 221600a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 221700a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 221800a23bdaSmrg 221900a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 222000a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 222100a23bdaSmrg ptr[i++] = 0; 222200a23bdaSmrg 222300a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 222400a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 222500a23bdaSmrg ptr[i++] = 1; 222600a23bdaSmrg ptr[i++] = 1; 222700a23bdaSmrg ptr[i++] = 1; 222800a23bdaSmrg 222900a23bdaSmrg 223000a23bdaSmrg /* Dispatch */ 223100a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 223200a23bdaSmrg ptr[i++] = 1; 223300a23bdaSmrg ptr[i++] = 1; 223400a23bdaSmrg ptr[i++] = 1; 223500a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 223600a23bdaSmrg 223700a23bdaSmrg 223800a23bdaSmrg while (i & 7) 223900a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 224000a23bdaSmrg 224100a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 224200a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 224300a23bdaSmrg ib_info.size = i; 224400a23bdaSmrg 224500a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 224600a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 224700a23bdaSmrg ibs_request.ring = 0; 224800a23bdaSmrg ibs_request.number_of_ibs = 1; 224900a23bdaSmrg ibs_request.ibs = &ib_info; 225000a23bdaSmrg ibs_request.resources = bo_list; 225100a23bdaSmrg ibs_request.fence_info.handle = NULL; 225200a23bdaSmrg 225300a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 225400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 225500a23bdaSmrg seq_no = ibs_request.seq_no; 225600a23bdaSmrg 225700a23bdaSmrg 225800a23bdaSmrg 225900a23bdaSmrg /* Prepare second command with dependency on the first */ 226000a23bdaSmrg j = i; 226100a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 226200a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 226300a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 226400a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 226500a23bdaSmrg ptr[i++] = 99; 226600a23bdaSmrg 226700a23bdaSmrg while (i & 7) 226800a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 226900a23bdaSmrg 227000a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 227100a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 227200a23bdaSmrg ib_info.size = i - j; 227300a23bdaSmrg 227400a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 227500a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 227600a23bdaSmrg ibs_request.ring = 0; 227700a23bdaSmrg ibs_request.number_of_ibs = 1; 227800a23bdaSmrg ibs_request.ibs = &ib_info; 227900a23bdaSmrg ibs_request.resources = bo_list; 228000a23bdaSmrg ibs_request.fence_info.handle = NULL; 228100a23bdaSmrg 228200a23bdaSmrg ibs_request.number_of_dependencies = 1; 228300a23bdaSmrg 228400a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 228500a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 228600a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 228700a23bdaSmrg ibs_request.dependencies[0].ring = 0; 228800a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 228900a23bdaSmrg 229000a23bdaSmrg 229100a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 229200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 229300a23bdaSmrg 229400a23bdaSmrg 229500a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 229600a23bdaSmrg fence_status.context = context_handle[0]; 229700a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 229800a23bdaSmrg fence_status.ip_instance = 0; 229900a23bdaSmrg fence_status.ring = 0; 230000a23bdaSmrg fence_status.fence = ibs_request.seq_no; 230100a23bdaSmrg 230200a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 230300a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 230400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 230500a23bdaSmrg 230600a23bdaSmrg /* Expect the second command to wait for shader to complete */ 230700a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 230800a23bdaSmrg 230900a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 231000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 231100a23bdaSmrg 231200a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 231300a23bdaSmrg ib_result_mc_address, 4096); 231400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 231500a23bdaSmrg 231600a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 231700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 231800a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 231900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 232000a23bdaSmrg 232100a23bdaSmrg free(ibs_request.dependencies); 23223f012e29Smrg} 23235324fb0dSmrg 23249bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 23259bd392adSmrg{ 23269bd392adSmrg struct amdgpu_test_shader *shader; 23279bd392adSmrg int i, loop = 0x10000; 23289bd392adSmrg 23299bd392adSmrg switch (family) { 23309bd392adSmrg case AMDGPU_FAMILY_AI: 23319bd392adSmrg shader = &memcpy_cs_hang_slow_ai; 23329bd392adSmrg break; 23339bd392adSmrg case AMDGPU_FAMILY_RV: 23349bd392adSmrg shader = &memcpy_cs_hang_slow_rv; 23359bd392adSmrg break; 23369bd392adSmrg default: 23379bd392adSmrg return -1; 23389bd392adSmrg break; 23399bd392adSmrg } 23409bd392adSmrg 23419bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 23429bd392adSmrg 23439bd392adSmrg for (i = 0; i < loop; i++) 23449bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 23459bd392adSmrg shader->shader + shader->header_length, 23469bd392adSmrg shader->body_length * sizeof(uint32_t)); 23479bd392adSmrg 23489bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 23499bd392adSmrg shader->shader + shader->header_length + shader->body_length, 23509bd392adSmrg shader->foot_length * sizeof(uint32_t)); 23519bd392adSmrg 23529bd392adSmrg return 0; 23539bd392adSmrg} 23549bd392adSmrg 23555324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 23565324fb0dSmrg int cs_type) 23575324fb0dSmrg{ 23585324fb0dSmrg uint32_t shader_size; 23595324fb0dSmrg const uint32_t *shader; 23605324fb0dSmrg 23615324fb0dSmrg switch (cs_type) { 23625324fb0dSmrg case CS_BUFFERCLEAR: 23635324fb0dSmrg shader = bufferclear_cs_shader_gfx9; 23645324fb0dSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 23655324fb0dSmrg break; 23665324fb0dSmrg case CS_BUFFERCOPY: 23675324fb0dSmrg shader = buffercopy_cs_shader_gfx9; 23685324fb0dSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 23695324fb0dSmrg break; 23709bd392adSmrg case CS_HANG: 23719bd392adSmrg shader = memcpy_ps_hang; 23729bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 23739bd392adSmrg break; 23745324fb0dSmrg default: 23755324fb0dSmrg return -1; 23765324fb0dSmrg break; 23775324fb0dSmrg } 23785324fb0dSmrg 23795324fb0dSmrg memcpy(ptr, shader, shader_size); 23805324fb0dSmrg return 0; 23815324fb0dSmrg} 23825324fb0dSmrg 23835324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 23845324fb0dSmrg{ 23855324fb0dSmrg int i = 0; 23865324fb0dSmrg 23875324fb0dSmrg /* Write context control and load shadowing register if necessary */ 23885324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 23895324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 23905324fb0dSmrg ptr[i++] = 0x80000000; 23915324fb0dSmrg ptr[i++] = 0x80000000; 23925324fb0dSmrg } 23935324fb0dSmrg 23945324fb0dSmrg /* Issue commands to set default compute state. */ 23955324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 23965324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 23975324fb0dSmrg ptr[i++] = 0x204; 23985324fb0dSmrg i += 3; 239988f8a8d2Smrg 24005324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 24015324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 24025324fb0dSmrg ptr[i++] = 0x218; 24035324fb0dSmrg ptr[i++] = 0; 24045324fb0dSmrg 24055324fb0dSmrg return i; 24065324fb0dSmrg} 24075324fb0dSmrg 24085324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr) 24095324fb0dSmrg{ 24105324fb0dSmrg int i = 0; 24115324fb0dSmrg 24125324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 24135324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 24145324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24155324fb0dSmrg ptr[i++] = 0x216; 24165324fb0dSmrg ptr[i++] = 0xffffffff; 24175324fb0dSmrg ptr[i++] = 0xffffffff; 24185324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 24195324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24205324fb0dSmrg ptr[i++] = 0x219; 24215324fb0dSmrg ptr[i++] = 0xffffffff; 24225324fb0dSmrg ptr[i++] = 0xffffffff; 24235324fb0dSmrg 24245324fb0dSmrg return i; 24255324fb0dSmrg} 24265324fb0dSmrg 24275324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 24285324fb0dSmrg{ 24295324fb0dSmrg int i, j; 24305324fb0dSmrg 24315324fb0dSmrg i = 0; 24325324fb0dSmrg 24335324fb0dSmrg /* Writes shader state to HW */ 24345324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 24355324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24365324fb0dSmrg ptr[i++] = 0x20c; 24375324fb0dSmrg ptr[i++] = (shader_addr >> 8); 24385324fb0dSmrg ptr[i++] = (shader_addr >> 40); 24395324fb0dSmrg /* write sh regs*/ 24405324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 24415324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 24425324fb0dSmrg /* - Gfx9ShRegBase */ 24435324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 24445324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 24455324fb0dSmrg } 24465324fb0dSmrg 24475324fb0dSmrg return i; 24485324fb0dSmrg} 24495324fb0dSmrg 24505324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 24515324fb0dSmrg uint32_t ip_type, 24525324fb0dSmrg uint32_t ring) 24535324fb0dSmrg{ 24545324fb0dSmrg amdgpu_context_handle context_handle; 24555324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 24565324fb0dSmrg volatile unsigned char *ptr_dst; 24575324fb0dSmrg void *ptr_shader; 24585324fb0dSmrg uint32_t *ptr_cmd; 24595324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 24605324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 24615324fb0dSmrg int i, r; 24625324fb0dSmrg int bo_dst_size = 16384; 24635324fb0dSmrg int bo_shader_size = 4096; 24645324fb0dSmrg int bo_cmd_size = 4096; 24655324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 24665324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 24675324fb0dSmrg amdgpu_bo_list_handle bo_list; 24685324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 24695324fb0dSmrg uint32_t expired; 24705324fb0dSmrg 24715324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 24725324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24735324fb0dSmrg 24745324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 24755324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 24765324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 24775324fb0dSmrg &mc_address_cmd, &va_cmd); 24785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24795324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 24805324fb0dSmrg 24815324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 24825324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24835324fb0dSmrg &bo_shader, &ptr_shader, 24845324fb0dSmrg &mc_address_shader, &va_shader); 24855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 248688f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 24875324fb0dSmrg 24885324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 24895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24905324fb0dSmrg 24915324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 24925324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24935324fb0dSmrg &bo_dst, (void **)&ptr_dst, 24945324fb0dSmrg &mc_address_dst, &va_dst); 24955324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24965324fb0dSmrg 24975324fb0dSmrg i = 0; 24985324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 24995324fb0dSmrg 25005324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 25015324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 25025324fb0dSmrg 25035324fb0dSmrg /* Writes shader state to HW */ 25045324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 25055324fb0dSmrg 25065324fb0dSmrg /* Write constant data */ 25075324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 25085324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 25095324fb0dSmrg ptr_cmd[i++] = 0x240; 25105324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 25115324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 25125324fb0dSmrg ptr_cmd[i++] = 0x400; 25135324fb0dSmrg ptr_cmd[i++] = 0x74fac; 25145324fb0dSmrg 25155324fb0dSmrg /* Sets a range of pixel shader constants */ 25165324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 25175324fb0dSmrg ptr_cmd[i++] = 0x244; 25185324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25195324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25205324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25215324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25225324fb0dSmrg 252388f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 252488f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 252588f8a8d2Smrg ptr_cmd[i++] = 0x215; 252688f8a8d2Smrg ptr_cmd[i++] = 0; 252788f8a8d2Smrg 25285324fb0dSmrg /* dispatch direct command */ 25295324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 25305324fb0dSmrg ptr_cmd[i++] = 0x10; 25315324fb0dSmrg ptr_cmd[i++] = 1; 25325324fb0dSmrg ptr_cmd[i++] = 1; 25335324fb0dSmrg ptr_cmd[i++] = 1; 25345324fb0dSmrg 25355324fb0dSmrg while (i & 7) 25365324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 25375324fb0dSmrg 25385324fb0dSmrg resources[0] = bo_dst; 25395324fb0dSmrg resources[1] = bo_shader; 25405324fb0dSmrg resources[2] = bo_cmd; 25415324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 25425324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25435324fb0dSmrg 25445324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 25455324fb0dSmrg ib_info.size = i; 25465324fb0dSmrg ibs_request.ip_type = ip_type; 25475324fb0dSmrg ibs_request.ring = ring; 25485324fb0dSmrg ibs_request.resources = bo_list; 25495324fb0dSmrg ibs_request.number_of_ibs = 1; 25505324fb0dSmrg ibs_request.ibs = &ib_info; 25515324fb0dSmrg ibs_request.fence_info.handle = NULL; 25525324fb0dSmrg 25535324fb0dSmrg /* submit CS */ 25545324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 25555324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25565324fb0dSmrg 25575324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 25585324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25595324fb0dSmrg 25605324fb0dSmrg fence_status.ip_type = ip_type; 25615324fb0dSmrg fence_status.ip_instance = 0; 25625324fb0dSmrg fence_status.ring = ring; 25635324fb0dSmrg fence_status.context = context_handle; 25645324fb0dSmrg fence_status.fence = ibs_request.seq_no; 25655324fb0dSmrg 25665324fb0dSmrg /* wait for IB accomplished */ 25675324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 25685324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 25695324fb0dSmrg 0, &expired); 25705324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25715324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 25725324fb0dSmrg 25735324fb0dSmrg /* verify if memset test result meets with expected */ 25745324fb0dSmrg i = 0; 25755324fb0dSmrg while(i < bo_dst_size) { 25765324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 25775324fb0dSmrg } 25785324fb0dSmrg 25795324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 25805324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25815324fb0dSmrg 25825324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 25835324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25845324fb0dSmrg 25855324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 25865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25875324fb0dSmrg 25885324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 25895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25905324fb0dSmrg} 25915324fb0dSmrg 25925324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 25935324fb0dSmrg uint32_t ip_type, 25949bd392adSmrg uint32_t ring, 25959bd392adSmrg int hang) 25965324fb0dSmrg{ 25975324fb0dSmrg amdgpu_context_handle context_handle; 25985324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 25995324fb0dSmrg volatile unsigned char *ptr_dst; 26005324fb0dSmrg void *ptr_shader; 26015324fb0dSmrg unsigned char *ptr_src; 26025324fb0dSmrg uint32_t *ptr_cmd; 26035324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 26045324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 26055324fb0dSmrg int i, r; 26065324fb0dSmrg int bo_dst_size = 16384; 26075324fb0dSmrg int bo_shader_size = 4096; 26085324fb0dSmrg int bo_cmd_size = 4096; 26095324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 26105324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 26119bd392adSmrg uint32_t expired, hang_state, hangs; 26129bd392adSmrg enum cs_type cs_type; 26135324fb0dSmrg amdgpu_bo_list_handle bo_list; 26145324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 26155324fb0dSmrg 26165324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 26175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26185324fb0dSmrg 26195324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 26205324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 26215324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 26225324fb0dSmrg &mc_address_cmd, &va_cmd); 26235324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26245324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 26255324fb0dSmrg 26265324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 26275324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26285324fb0dSmrg &bo_shader, &ptr_shader, 26295324fb0dSmrg &mc_address_shader, &va_shader); 26305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 263188f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 26325324fb0dSmrg 26339bd392adSmrg cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 26349bd392adSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 26355324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26365324fb0dSmrg 26375324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26385324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26395324fb0dSmrg &bo_src, (void **)&ptr_src, 26405324fb0dSmrg &mc_address_src, &va_src); 26415324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26425324fb0dSmrg 26435324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26445324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26455324fb0dSmrg &bo_dst, (void **)&ptr_dst, 26465324fb0dSmrg &mc_address_dst, &va_dst); 26475324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26485324fb0dSmrg 26495324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 26505324fb0dSmrg 26515324fb0dSmrg i = 0; 26525324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 26535324fb0dSmrg 26545324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 26555324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 26565324fb0dSmrg 26575324fb0dSmrg /* Writes shader state to HW */ 26585324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 26595324fb0dSmrg 26605324fb0dSmrg /* Write constant data */ 26615324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 26625324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26635324fb0dSmrg ptr_cmd[i++] = 0x240; 26645324fb0dSmrg ptr_cmd[i++] = mc_address_src; 26655324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 26665324fb0dSmrg ptr_cmd[i++] = 0x400; 26675324fb0dSmrg ptr_cmd[i++] = 0x74fac; 26685324fb0dSmrg 26695324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 26705324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26715324fb0dSmrg ptr_cmd[i++] = 0x244; 26725324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 26735324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 26745324fb0dSmrg ptr_cmd[i++] = 0x400; 26755324fb0dSmrg ptr_cmd[i++] = 0x74fac; 26765324fb0dSmrg 267788f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 267888f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 267988f8a8d2Smrg ptr_cmd[i++] = 0x215; 268088f8a8d2Smrg ptr_cmd[i++] = 0; 268188f8a8d2Smrg 26825324fb0dSmrg /* dispatch direct command */ 26835324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 26845324fb0dSmrg ptr_cmd[i++] = 0x10; 26855324fb0dSmrg ptr_cmd[i++] = 1; 26865324fb0dSmrg ptr_cmd[i++] = 1; 26875324fb0dSmrg ptr_cmd[i++] = 1; 26885324fb0dSmrg 26895324fb0dSmrg while (i & 7) 26905324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 26915324fb0dSmrg 26925324fb0dSmrg resources[0] = bo_shader; 26935324fb0dSmrg resources[1] = bo_src; 26945324fb0dSmrg resources[2] = bo_dst; 26955324fb0dSmrg resources[3] = bo_cmd; 26965324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 26975324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26985324fb0dSmrg 26995324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 27005324fb0dSmrg ib_info.size = i; 27015324fb0dSmrg ibs_request.ip_type = ip_type; 27025324fb0dSmrg ibs_request.ring = ring; 27035324fb0dSmrg ibs_request.resources = bo_list; 27045324fb0dSmrg ibs_request.number_of_ibs = 1; 27055324fb0dSmrg ibs_request.ibs = &ib_info; 27065324fb0dSmrg ibs_request.fence_info.handle = NULL; 27075324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 27085324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27095324fb0dSmrg 27105324fb0dSmrg fence_status.ip_type = ip_type; 27115324fb0dSmrg fence_status.ip_instance = 0; 27125324fb0dSmrg fence_status.ring = ring; 27135324fb0dSmrg fence_status.context = context_handle; 27145324fb0dSmrg fence_status.fence = ibs_request.seq_no; 27155324fb0dSmrg 27165324fb0dSmrg /* wait for IB accomplished */ 27175324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 27185324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 27195324fb0dSmrg 0, &expired); 27205324fb0dSmrg 27219bd392adSmrg if (!hang) { 27229bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27239bd392adSmrg CU_ASSERT_EQUAL(expired, true); 27249bd392adSmrg 27259bd392adSmrg /* verify if memcpy test result meets with expected */ 27269bd392adSmrg i = 0; 27279bd392adSmrg while(i < bo_dst_size) { 27289bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 27299bd392adSmrg i++; 27309bd392adSmrg } 27319bd392adSmrg } else { 27329bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 27339bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27349bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 27355324fb0dSmrg } 27365324fb0dSmrg 27375324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 27385324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27395324fb0dSmrg 27405324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 27415324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27425324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 27435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27445324fb0dSmrg 27455324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 27465324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27475324fb0dSmrg 27485324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 27495324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27505324fb0dSmrg 27515324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 27525324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27535324fb0dSmrg} 275488f8a8d2Smrg 275588f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void) 27565324fb0dSmrg{ 27575324fb0dSmrg int r; 27585324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 27595324fb0dSmrg uint32_t ring_id; 27605324fb0dSmrg 27615324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 27625324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 276388f8a8d2Smrg if (!info.available_rings) 276488f8a8d2Smrg printf("SKIP ... as there's no compute ring\n"); 27655324fb0dSmrg 27665324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27675324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 27689bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 27695324fb0dSmrg } 277088f8a8d2Smrg} 277188f8a8d2Smrg 277288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void) 277388f8a8d2Smrg{ 277488f8a8d2Smrg int r; 277588f8a8d2Smrg struct drm_amdgpu_info_hw_ip info; 277688f8a8d2Smrg uint32_t ring_id; 27775324fb0dSmrg 27785324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 27795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 278088f8a8d2Smrg if (!info.available_rings) 278188f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 27825324fb0dSmrg 27835324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27845324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 27859bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 27869bd392adSmrg } 27879bd392adSmrg} 27889bd392adSmrg 27899bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 27909bd392adSmrg{ 27919bd392adSmrg int r; 27929bd392adSmrg struct drm_amdgpu_info_hw_ip info; 27939bd392adSmrg uint32_t ring_id; 27949bd392adSmrg 27959bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 27969bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27979bd392adSmrg if (!info.available_rings) 27989bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 27999bd392adSmrg 28009bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 28019bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 28029bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 28039bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 28049bd392adSmrg } 28059bd392adSmrg} 28069bd392adSmrg 28079bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 28089bd392adSmrg uint32_t ip_type, uint32_t ring) 28099bd392adSmrg{ 28109bd392adSmrg amdgpu_context_handle context_handle; 28119bd392adSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 28129bd392adSmrg volatile unsigned char *ptr_dst; 28139bd392adSmrg void *ptr_shader; 28149bd392adSmrg unsigned char *ptr_src; 28159bd392adSmrg uint32_t *ptr_cmd; 28169bd392adSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 28179bd392adSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 28189bd392adSmrg int i, r; 28199bd392adSmrg int bo_dst_size = 0x4000000; 28209bd392adSmrg int bo_shader_size = 0x400000; 28219bd392adSmrg int bo_cmd_size = 4096; 28229bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 28239bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 28249bd392adSmrg uint32_t hang_state, hangs, expired; 28259bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 28269bd392adSmrg amdgpu_bo_list_handle bo_list; 28279bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 28289bd392adSmrg 28299bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 28309bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28319bd392adSmrg 28329bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 28339bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28349bd392adSmrg 28359bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 28369bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 28379bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 28389bd392adSmrg &mc_address_cmd, &va_cmd); 28399bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28409bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 28419bd392adSmrg 28429bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 28439bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28449bd392adSmrg &bo_shader, &ptr_shader, 28459bd392adSmrg &mc_address_shader, &va_shader); 28469bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28479bd392adSmrg memset(ptr_shader, 0, bo_shader_size); 28489bd392adSmrg 28499bd392adSmrg r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 28509bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28519bd392adSmrg 28529bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28539bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28549bd392adSmrg &bo_src, (void **)&ptr_src, 28559bd392adSmrg &mc_address_src, &va_src); 28569bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28579bd392adSmrg 28589bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28599bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28609bd392adSmrg &bo_dst, (void **)&ptr_dst, 28619bd392adSmrg &mc_address_dst, &va_dst); 28629bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28639bd392adSmrg 28649bd392adSmrg memset(ptr_src, 0x55, bo_dst_size); 28659bd392adSmrg 28669bd392adSmrg i = 0; 28679bd392adSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 28689bd392adSmrg 28699bd392adSmrg /* Issue commands to set cu mask used in current dispatch */ 28709bd392adSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 28719bd392adSmrg 28729bd392adSmrg /* Writes shader state to HW */ 28739bd392adSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 28749bd392adSmrg 28759bd392adSmrg /* Write constant data */ 28769bd392adSmrg /* Writes the texture resource constants data to the SGPRs */ 28779bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28789bd392adSmrg ptr_cmd[i++] = 0x240; 28799bd392adSmrg ptr_cmd[i++] = mc_address_src; 28809bd392adSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 28819bd392adSmrg ptr_cmd[i++] = 0x400000; 28829bd392adSmrg ptr_cmd[i++] = 0x74fac; 28839bd392adSmrg 28849bd392adSmrg /* Writes the UAV constant data to the SGPRs. */ 28859bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28869bd392adSmrg ptr_cmd[i++] = 0x244; 28879bd392adSmrg ptr_cmd[i++] = mc_address_dst; 28889bd392adSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 28899bd392adSmrg ptr_cmd[i++] = 0x400000; 28909bd392adSmrg ptr_cmd[i++] = 0x74fac; 28919bd392adSmrg 28929bd392adSmrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 28939bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 28949bd392adSmrg ptr_cmd[i++] = 0x215; 28959bd392adSmrg ptr_cmd[i++] = 0; 28969bd392adSmrg 28979bd392adSmrg /* dispatch direct command */ 28989bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 28999bd392adSmrg ptr_cmd[i++] = 0x10000; 29009bd392adSmrg ptr_cmd[i++] = 1; 29019bd392adSmrg ptr_cmd[i++] = 1; 29029bd392adSmrg ptr_cmd[i++] = 1; 29039bd392adSmrg 29049bd392adSmrg while (i & 7) 29059bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 29069bd392adSmrg 29079bd392adSmrg resources[0] = bo_shader; 29089bd392adSmrg resources[1] = bo_src; 29099bd392adSmrg resources[2] = bo_dst; 29109bd392adSmrg resources[3] = bo_cmd; 29119bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 29129bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29139bd392adSmrg 29149bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 29159bd392adSmrg ib_info.size = i; 29169bd392adSmrg ibs_request.ip_type = ip_type; 29179bd392adSmrg ibs_request.ring = ring; 29189bd392adSmrg ibs_request.resources = bo_list; 29199bd392adSmrg ibs_request.number_of_ibs = 1; 29209bd392adSmrg ibs_request.ibs = &ib_info; 29219bd392adSmrg ibs_request.fence_info.handle = NULL; 29229bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 29239bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29249bd392adSmrg 29259bd392adSmrg fence_status.ip_type = ip_type; 29269bd392adSmrg fence_status.ip_instance = 0; 29279bd392adSmrg fence_status.ring = ring; 29289bd392adSmrg fence_status.context = context_handle; 29299bd392adSmrg fence_status.fence = ibs_request.seq_no; 29309bd392adSmrg 29319bd392adSmrg /* wait for IB accomplished */ 29329bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 29339bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 29349bd392adSmrg 0, &expired); 29359bd392adSmrg 29369bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 29379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29389bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 29399bd392adSmrg 29409bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 29419bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29429bd392adSmrg 29439bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 29449bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29459bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 29469bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29479bd392adSmrg 29489bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 29499bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29509bd392adSmrg 29519bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 29529bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29539bd392adSmrg 29549bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 29559bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29569bd392adSmrg} 29579bd392adSmrg 29589bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 29599bd392adSmrg{ 29609bd392adSmrg int r; 29619bd392adSmrg struct drm_amdgpu_info_hw_ip info; 29629bd392adSmrg uint32_t ring_id; 29639bd392adSmrg 29649bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 29659bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29669bd392adSmrg if (!info.available_rings) 29679bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 29689bd392adSmrg 29699bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 29709bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 29719bd392adSmrg amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 29729bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 29739bd392adSmrg } 29749bd392adSmrg} 29759bd392adSmrg 29769bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 29779bd392adSmrg{ 29789bd392adSmrg struct amdgpu_test_shader *shader; 29799bd392adSmrg int i, loop = 0x40000; 29809bd392adSmrg 29819bd392adSmrg switch (family) { 29829bd392adSmrg case AMDGPU_FAMILY_AI: 29839bd392adSmrg case AMDGPU_FAMILY_RV: 29849bd392adSmrg shader = &memcpy_ps_hang_slow_ai; 29859bd392adSmrg break; 29869bd392adSmrg default: 29879bd392adSmrg return -1; 29889bd392adSmrg break; 29895324fb0dSmrg } 29909bd392adSmrg 29919bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 29929bd392adSmrg 29939bd392adSmrg for (i = 0; i < loop; i++) 29949bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 29959bd392adSmrg shader->shader + shader->header_length, 29969bd392adSmrg shader->body_length * sizeof(uint32_t)); 29979bd392adSmrg 29989bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 29999bd392adSmrg shader->shader + shader->header_length + shader->body_length, 30009bd392adSmrg shader->foot_length * sizeof(uint32_t)); 30019bd392adSmrg 30029bd392adSmrg return 0; 30035324fb0dSmrg} 30045324fb0dSmrg 30055324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 30065324fb0dSmrg{ 30075324fb0dSmrg int i; 30085324fb0dSmrg uint32_t shader_offset= 256; 30095324fb0dSmrg uint32_t mem_offset, patch_code_offset; 30105324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 30115324fb0dSmrg const uint32_t *shader; 30125324fb0dSmrg const uint32_t *patchinfo_code; 30135324fb0dSmrg const uint32_t *patchcode_offset; 30145324fb0dSmrg 30155324fb0dSmrg switch (ps_type) { 30165324fb0dSmrg case PS_CONST: 30175324fb0dSmrg shader = ps_const_shader_gfx9; 30185324fb0dSmrg shader_size = sizeof(ps_const_shader_gfx9); 30195324fb0dSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 30205324fb0dSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 30215324fb0dSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 30225324fb0dSmrg break; 30235324fb0dSmrg case PS_TEX: 30245324fb0dSmrg shader = ps_tex_shader_gfx9; 30255324fb0dSmrg shader_size = sizeof(ps_tex_shader_gfx9); 30265324fb0dSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 30275324fb0dSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 30285324fb0dSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 30295324fb0dSmrg break; 30309bd392adSmrg case PS_HANG: 30319bd392adSmrg shader = memcpy_ps_hang; 30329bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 30339bd392adSmrg 30349bd392adSmrg memcpy(ptr, shader, shader_size); 30359bd392adSmrg return 0; 30365324fb0dSmrg default: 30375324fb0dSmrg return -1; 30385324fb0dSmrg break; 30395324fb0dSmrg } 30405324fb0dSmrg 30415324fb0dSmrg /* write main shader program */ 30425324fb0dSmrg for (i = 0 ; i < 10; i++) { 30435324fb0dSmrg mem_offset = i * shader_offset; 30445324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 30455324fb0dSmrg } 30465324fb0dSmrg 30475324fb0dSmrg /* overwrite patch codes */ 30485324fb0dSmrg for (i = 0 ; i < 10; i++) { 30495324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 30505324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 30515324fb0dSmrg memcpy(ptr + mem_offset, 30525324fb0dSmrg patchinfo_code + patch_code_offset, 30535324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 30545324fb0dSmrg } 30555324fb0dSmrg 30565324fb0dSmrg return 0; 30575324fb0dSmrg} 30585324fb0dSmrg 30595324fb0dSmrg/* load RectPosTexFast_VS */ 30605324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr) 30615324fb0dSmrg{ 30625324fb0dSmrg const uint32_t *shader; 30635324fb0dSmrg uint32_t shader_size; 30645324fb0dSmrg 30655324fb0dSmrg shader = vs_RectPosTexFast_shader_gfx9; 30665324fb0dSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 30675324fb0dSmrg 30685324fb0dSmrg memcpy(ptr, shader, shader_size); 30695324fb0dSmrg 30705324fb0dSmrg return 0; 30715324fb0dSmrg} 30725324fb0dSmrg 30735324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr) 30745324fb0dSmrg{ 30755324fb0dSmrg int i = 0; 30765324fb0dSmrg const uint32_t *preamblecache_ptr; 30775324fb0dSmrg uint32_t preamblecache_size; 30785324fb0dSmrg 30795324fb0dSmrg /* Write context control and load shadowing register if necessary */ 30805324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 30815324fb0dSmrg ptr[i++] = 0x80000000; 30825324fb0dSmrg ptr[i++] = 0x80000000; 30835324fb0dSmrg 30845324fb0dSmrg preamblecache_ptr = preamblecache_gfx9; 30855324fb0dSmrg preamblecache_size = sizeof(preamblecache_gfx9); 30865324fb0dSmrg 30875324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 30885324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 30895324fb0dSmrg} 30905324fb0dSmrg 30915324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 30929bd392adSmrg uint64_t dst_addr, 30939bd392adSmrg int hang_slow) 30945324fb0dSmrg{ 30955324fb0dSmrg int i = 0; 30965324fb0dSmrg 30975324fb0dSmrg /* setup color buffer */ 30985324fb0dSmrg /* offset reg 30995324fb0dSmrg 0xA318 CB_COLOR0_BASE 31005324fb0dSmrg 0xA319 CB_COLOR0_BASE_EXT 31015324fb0dSmrg 0xA31A CB_COLOR0_ATTRIB2 31025324fb0dSmrg 0xA31B CB_COLOR0_VIEW 31035324fb0dSmrg 0xA31C CB_COLOR0_INFO 31045324fb0dSmrg 0xA31D CB_COLOR0_ATTRIB 31055324fb0dSmrg 0xA31E CB_COLOR0_DCC_CONTROL 31065324fb0dSmrg 0xA31F CB_COLOR0_CMASK 31075324fb0dSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 31085324fb0dSmrg 0xA321 CB_COLOR0_FMASK 31095324fb0dSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 31105324fb0dSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 31115324fb0dSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 31125324fb0dSmrg 0xA325 CB_COLOR0_DCC_BASE 31135324fb0dSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 31145324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 31155324fb0dSmrg ptr[i++] = 0x318; 31165324fb0dSmrg ptr[i++] = dst_addr >> 8; 31175324fb0dSmrg ptr[i++] = dst_addr >> 40; 31189bd392adSmrg ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 31195324fb0dSmrg ptr[i++] = 0; 31205324fb0dSmrg ptr[i++] = 0x50438; 31215324fb0dSmrg ptr[i++] = 0x10140000; 31225324fb0dSmrg i += 9; 31235324fb0dSmrg 31245324fb0dSmrg /* mmCB_MRT0_EPITCH */ 31255324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31265324fb0dSmrg ptr[i++] = 0x1e8; 31279bd392adSmrg ptr[i++] = hang_slow ? 0x7ff : 0x1f; 31285324fb0dSmrg 31295324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 31305324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31315324fb0dSmrg ptr[i++] = 0x32b; 31325324fb0dSmrg ptr[i++] = 0; 31335324fb0dSmrg 31345324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 31355324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31365324fb0dSmrg ptr[i++] = 0x33a; 31375324fb0dSmrg ptr[i++] = 0; 31385324fb0dSmrg 31395324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 31405324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31415324fb0dSmrg ptr[i++] = 0x1c5; 31425324fb0dSmrg ptr[i++] = 9; 31435324fb0dSmrg 31445324fb0dSmrg /* Setup depth buffer */ 31455324fb0dSmrg /* mmDB_Z_INFO */ 31465324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 31475324fb0dSmrg ptr[i++] = 0xe; 31485324fb0dSmrg i += 2; 31495324fb0dSmrg 31505324fb0dSmrg return i; 31515324fb0dSmrg} 31525324fb0dSmrg 31539bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 31545324fb0dSmrg{ 31555324fb0dSmrg int i = 0; 31565324fb0dSmrg const uint32_t *cached_cmd_ptr; 31575324fb0dSmrg uint32_t cached_cmd_size; 31585324fb0dSmrg 31595324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 31605324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31615324fb0dSmrg ptr[i++] = 0xd7; 31625324fb0dSmrg ptr[i++] = 0; 31635324fb0dSmrg 31645324fb0dSmrg ptr[i++] = 0xffff1000; 31655324fb0dSmrg ptr[i++] = 0xc0021000; 31665324fb0dSmrg 31675324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31685324fb0dSmrg ptr[i++] = 0xd7; 31695324fb0dSmrg ptr[i++] = 1; 31705324fb0dSmrg 31715324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 31725324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 31735324fb0dSmrg ptr[i++] = 0x2fe; 31745324fb0dSmrg i += 16; 31755324fb0dSmrg 31765324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 31775324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 31785324fb0dSmrg ptr[i++] = 0x2f5; 31795324fb0dSmrg i += 2; 31805324fb0dSmrg 31815324fb0dSmrg cached_cmd_ptr = cached_cmd_gfx9; 31825324fb0dSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 31835324fb0dSmrg 31845324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 31859bd392adSmrg if (hang_slow) 31869bd392adSmrg *(ptr + i + 12) = 0x8000800; 31875324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 31885324fb0dSmrg 31895324fb0dSmrg return i; 31905324fb0dSmrg} 31915324fb0dSmrg 31925324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 31935324fb0dSmrg int ps_type, 31949bd392adSmrg uint64_t shader_addr, 31959bd392adSmrg int hang_slow) 31965324fb0dSmrg{ 31975324fb0dSmrg int i = 0; 31985324fb0dSmrg 31995324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 32005324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32015324fb0dSmrg ptr[i++] = 0x207; 32025324fb0dSmrg ptr[i++] = 0; 32035324fb0dSmrg 32045324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 32055324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32065324fb0dSmrg ptr[i++] = 0x46; 32075324fb0dSmrg ptr[i++] = 0xffff; 32085324fb0dSmrg 32095324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 32105324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 32115324fb0dSmrg ptr[i++] = 0x48; 32125324fb0dSmrg ptr[i++] = shader_addr >> 8; 32135324fb0dSmrg ptr[i++] = shader_addr >> 40; 32145324fb0dSmrg 32155324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 32165324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32175324fb0dSmrg ptr[i++] = 0x4a; 32185324fb0dSmrg ptr[i++] = 0xc0081; 32195324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 32205324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32215324fb0dSmrg ptr[i++] = 0x4b; 32225324fb0dSmrg ptr[i++] = 0x18; 32235324fb0dSmrg 32245324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 32255324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32265324fb0dSmrg ptr[i++] = 0x1b1; 32275324fb0dSmrg ptr[i++] = 2; 32285324fb0dSmrg 32295324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 32305324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32315324fb0dSmrg ptr[i++] = 0x1c3; 32325324fb0dSmrg ptr[i++] = 4; 32335324fb0dSmrg 32345324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32355324fb0dSmrg ptr[i++] = 0x4c; 32365324fb0dSmrg i += 2; 32379bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 32389bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 32395324fb0dSmrg 32405324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32415324fb0dSmrg ptr[i++] = 0x50; 32425324fb0dSmrg i += 2; 32435324fb0dSmrg if (ps_type == PS_CONST) { 32445324fb0dSmrg i += 2; 32455324fb0dSmrg } else if (ps_type == PS_TEX) { 32465324fb0dSmrg ptr[i++] = 0x3f800000; 32475324fb0dSmrg ptr[i++] = 0x3f800000; 32485324fb0dSmrg } 32495324fb0dSmrg 32505324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32515324fb0dSmrg ptr[i++] = 0x54; 32525324fb0dSmrg i += 4; 32535324fb0dSmrg 32545324fb0dSmrg return i; 32555324fb0dSmrg} 32565324fb0dSmrg 32575324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 32585324fb0dSmrg int ps_type, 32595324fb0dSmrg uint64_t shader_addr) 32605324fb0dSmrg{ 32615324fb0dSmrg int i, j; 32625324fb0dSmrg const uint32_t *sh_registers; 32635324fb0dSmrg const uint32_t *context_registers; 32645324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 32655324fb0dSmrg 32665324fb0dSmrg if (ps_type == PS_CONST) { 32675324fb0dSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 32685324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 32695324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 32705324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 32715324fb0dSmrg } else if (ps_type == PS_TEX) { 32725324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 32735324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 32745324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 32755324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 32765324fb0dSmrg } 32775324fb0dSmrg 32785324fb0dSmrg i = 0; 32795324fb0dSmrg 32805324fb0dSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 32815324fb0dSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 32825324fb0dSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 32835324fb0dSmrg shader_addr += 256 * 9; 32845324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 32855324fb0dSmrg ptr[i++] = 0x7; 32865324fb0dSmrg ptr[i++] = 0xffff; 32875324fb0dSmrg ptr[i++] = shader_addr >> 8; 32885324fb0dSmrg ptr[i++] = shader_addr >> 40; 32895324fb0dSmrg 32905324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 32915324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32925324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 32935324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 32945324fb0dSmrg } 32955324fb0dSmrg 32965324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 32975324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 32985324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32995324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 33005324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 33015324fb0dSmrg } 33025324fb0dSmrg 33035324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 33045324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 33055324fb0dSmrg ptr[i++] = 0x1b3; 33065324fb0dSmrg ptr[i++] = 2; 33075324fb0dSmrg } 33085324fb0dSmrg } 33095324fb0dSmrg 33105324fb0dSmrg return i; 33115324fb0dSmrg} 33125324fb0dSmrg 33135324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr) 33145324fb0dSmrg{ 33155324fb0dSmrg int i = 0; 33165324fb0dSmrg 33175324fb0dSmrg /* mmIA_MULTI_VGT_PARAM */ 33185324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 33195324fb0dSmrg ptr[i++] = 0x40000258; 33205324fb0dSmrg ptr[i++] = 0xd00ff; 33215324fb0dSmrg 33225324fb0dSmrg /* mmVGT_PRIMITIVE_TYPE */ 33235324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 33245324fb0dSmrg ptr[i++] = 0x10000242; 33255324fb0dSmrg ptr[i++] = 0x11; 33265324fb0dSmrg 33275324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 33285324fb0dSmrg ptr[i++] = 3; 33295324fb0dSmrg ptr[i++] = 2; 33305324fb0dSmrg 33315324fb0dSmrg return i; 33325324fb0dSmrg} 33335324fb0dSmrg 33345324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 33355324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 33365324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 33375324fb0dSmrg uint64_t mc_address_shader_ps, 33385324fb0dSmrg uint64_t mc_address_shader_vs, 33395324fb0dSmrg uint32_t ring_id) 33405324fb0dSmrg{ 33415324fb0dSmrg amdgpu_context_handle context_handle; 33425324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 33435324fb0dSmrg volatile unsigned char *ptr_dst; 33445324fb0dSmrg uint32_t *ptr_cmd; 33455324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 33465324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 33475324fb0dSmrg int i, r; 33485324fb0dSmrg int bo_dst_size = 16384; 33495324fb0dSmrg int bo_cmd_size = 4096; 33505324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 33515324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 33525324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 33535324fb0dSmrg uint32_t expired; 33545324fb0dSmrg amdgpu_bo_list_handle bo_list; 33555324fb0dSmrg 33565324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 33575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33585324fb0dSmrg 33595324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 33605324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 33615324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 33625324fb0dSmrg &mc_address_cmd, &va_cmd); 33635324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33645324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 33655324fb0dSmrg 33665324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 33675324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 33685324fb0dSmrg &bo_dst, (void **)&ptr_dst, 33695324fb0dSmrg &mc_address_dst, &va_dst); 33705324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33715324fb0dSmrg 33725324fb0dSmrg i = 0; 33735324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 33745324fb0dSmrg 33759bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 33765324fb0dSmrg 33779bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 33785324fb0dSmrg 33799bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 33805324fb0dSmrg 33815324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 33825324fb0dSmrg 33835324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 33845324fb0dSmrg ptr_cmd[i++] = 0xc; 33855324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33865324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33875324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33885324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33895324fb0dSmrg 33905324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 33915324fb0dSmrg 33925324fb0dSmrg while (i & 7) 33935324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 33945324fb0dSmrg 33955324fb0dSmrg resources[0] = bo_dst; 33965324fb0dSmrg resources[1] = bo_shader_ps; 33975324fb0dSmrg resources[2] = bo_shader_vs; 33985324fb0dSmrg resources[3] = bo_cmd; 33999bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 34005324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34015324fb0dSmrg 34025324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 34035324fb0dSmrg ib_info.size = i; 34045324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 34055324fb0dSmrg ibs_request.ring = ring_id; 34065324fb0dSmrg ibs_request.resources = bo_list; 34075324fb0dSmrg ibs_request.number_of_ibs = 1; 34085324fb0dSmrg ibs_request.ibs = &ib_info; 34095324fb0dSmrg ibs_request.fence_info.handle = NULL; 34105324fb0dSmrg 34115324fb0dSmrg /* submit CS */ 34125324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 34135324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34145324fb0dSmrg 34155324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 34165324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34175324fb0dSmrg 34185324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 34195324fb0dSmrg fence_status.ip_instance = 0; 34205324fb0dSmrg fence_status.ring = ring_id; 34215324fb0dSmrg fence_status.context = context_handle; 34225324fb0dSmrg fence_status.fence = ibs_request.seq_no; 34235324fb0dSmrg 34245324fb0dSmrg /* wait for IB accomplished */ 34255324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 34265324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 34275324fb0dSmrg 0, &expired); 34285324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34295324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 34305324fb0dSmrg 34315324fb0dSmrg /* verify if memset test result meets with expected */ 34325324fb0dSmrg i = 0; 34335324fb0dSmrg while(i < bo_dst_size) { 34345324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 34355324fb0dSmrg } 34365324fb0dSmrg 34375324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 34385324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34395324fb0dSmrg 34405324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 34415324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34425324fb0dSmrg 34435324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 34445324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34455324fb0dSmrg} 34465324fb0dSmrg 34475324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 34485324fb0dSmrg uint32_t ring) 34495324fb0dSmrg{ 34505324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 34515324fb0dSmrg void *ptr_shader_ps; 34525324fb0dSmrg void *ptr_shader_vs; 34535324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 34545324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 34555324fb0dSmrg int r; 34565324fb0dSmrg int bo_shader_size = 4096; 34575324fb0dSmrg 34585324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34595324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34605324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 34615324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 34625324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 346388f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 34645324fb0dSmrg 34655324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34665324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34675324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 34685324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 34695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 347088f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 34715324fb0dSmrg 34725324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 34735324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34745324fb0dSmrg 34755324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 34765324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34775324fb0dSmrg 34785324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 34795324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 34805324fb0dSmrg 34815324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 34825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34835324fb0dSmrg 34845324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 34855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34865324fb0dSmrg} 34875324fb0dSmrg 34885324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 34895324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 34905324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 34915324fb0dSmrg uint64_t mc_address_shader_ps, 34925324fb0dSmrg uint64_t mc_address_shader_vs, 34939bd392adSmrg uint32_t ring, int hang) 34945324fb0dSmrg{ 34955324fb0dSmrg amdgpu_context_handle context_handle; 34965324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 34975324fb0dSmrg volatile unsigned char *ptr_dst; 34985324fb0dSmrg unsigned char *ptr_src; 34995324fb0dSmrg uint32_t *ptr_cmd; 35005324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 35015324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 35025324fb0dSmrg int i, r; 35035324fb0dSmrg int bo_size = 16384; 35045324fb0dSmrg int bo_cmd_size = 4096; 35055324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 35065324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 35079bd392adSmrg uint32_t hang_state, hangs; 35089bd392adSmrg uint32_t expired; 35095324fb0dSmrg amdgpu_bo_list_handle bo_list; 35105324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 35115324fb0dSmrg 35125324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 35135324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35145324fb0dSmrg 35155324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 35165324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 35175324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 35185324fb0dSmrg &mc_address_cmd, &va_cmd); 35195324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35205324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 35215324fb0dSmrg 35225324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35235324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35245324fb0dSmrg &bo_src, (void **)&ptr_src, 35255324fb0dSmrg &mc_address_src, &va_src); 35265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35275324fb0dSmrg 35285324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35295324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35305324fb0dSmrg &bo_dst, (void **)&ptr_dst, 35315324fb0dSmrg &mc_address_dst, &va_dst); 35325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35335324fb0dSmrg 35345324fb0dSmrg memset(ptr_src, 0x55, bo_size); 35355324fb0dSmrg 35365324fb0dSmrg i = 0; 35375324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 35385324fb0dSmrg 35399bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 35405324fb0dSmrg 35419bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 35425324fb0dSmrg 35439bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 35445324fb0dSmrg 35455324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 35465324fb0dSmrg 35475324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 35485324fb0dSmrg ptr_cmd[i++] = 0xc; 35495324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 8; 35505324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 35515324fb0dSmrg ptr_cmd[i++] = 0x7c01f; 35525324fb0dSmrg ptr_cmd[i++] = 0x90500fac; 35535324fb0dSmrg ptr_cmd[i++] = 0x3e000; 35545324fb0dSmrg i += 3; 35555324fb0dSmrg 35565324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 35575324fb0dSmrg ptr_cmd[i++] = 0x14; 35585324fb0dSmrg ptr_cmd[i++] = 0x92; 35595324fb0dSmrg i += 3; 35605324fb0dSmrg 356188f8a8d2Smrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35625324fb0dSmrg ptr_cmd[i++] = 0x191; 35635324fb0dSmrg ptr_cmd[i++] = 0; 35645324fb0dSmrg 35655324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 35665324fb0dSmrg 35675324fb0dSmrg while (i & 7) 35685324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 35695324fb0dSmrg 35705324fb0dSmrg resources[0] = bo_dst; 35715324fb0dSmrg resources[1] = bo_src; 35725324fb0dSmrg resources[2] = bo_shader_ps; 35735324fb0dSmrg resources[3] = bo_shader_vs; 35745324fb0dSmrg resources[4] = bo_cmd; 35755324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 35765324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35775324fb0dSmrg 35785324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 35795324fb0dSmrg ib_info.size = i; 35805324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 35815324fb0dSmrg ibs_request.ring = ring; 35825324fb0dSmrg ibs_request.resources = bo_list; 35835324fb0dSmrg ibs_request.number_of_ibs = 1; 35845324fb0dSmrg ibs_request.ibs = &ib_info; 35855324fb0dSmrg ibs_request.fence_info.handle = NULL; 35865324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 35875324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35885324fb0dSmrg 35895324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 35905324fb0dSmrg fence_status.ip_instance = 0; 35915324fb0dSmrg fence_status.ring = ring; 35925324fb0dSmrg fence_status.context = context_handle; 35935324fb0dSmrg fence_status.fence = ibs_request.seq_no; 35945324fb0dSmrg 35955324fb0dSmrg /* wait for IB accomplished */ 35965324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 35975324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 35985324fb0dSmrg 0, &expired); 35999bd392adSmrg if (!hang) { 36009bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36019bd392adSmrg CU_ASSERT_EQUAL(expired, true); 36025324fb0dSmrg 36039bd392adSmrg /* verify if memcpy test result meets with expected */ 36049bd392adSmrg i = 0; 36059bd392adSmrg while(i < bo_size) { 36069bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 36079bd392adSmrg i++; 36089bd392adSmrg } 36099bd392adSmrg } else { 36109bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 36119bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36129bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 36135324fb0dSmrg } 36145324fb0dSmrg 36155324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 36165324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36175324fb0dSmrg 36185324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 36195324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36205324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 36215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36225324fb0dSmrg 36235324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 36245324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36255324fb0dSmrg 36265324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 36275324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36285324fb0dSmrg} 36295324fb0dSmrg 36309bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 36319bd392adSmrg int hang) 36325324fb0dSmrg{ 36335324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 36345324fb0dSmrg void *ptr_shader_ps; 36355324fb0dSmrg void *ptr_shader_vs; 36365324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 36375324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 36385324fb0dSmrg int bo_shader_size = 4096; 36399bd392adSmrg enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 36405324fb0dSmrg int r; 36415324fb0dSmrg 36425324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 36435324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 36445324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 36455324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 36465324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 364788f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 36485324fb0dSmrg 36495324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 36505324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 36515324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 36525324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 36535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 365488f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 36555324fb0dSmrg 36569bd392adSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 36575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36585324fb0dSmrg 36595324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 36605324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36615324fb0dSmrg 36625324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 36639bd392adSmrg mc_address_shader_ps, mc_address_shader_vs, ring, hang); 36645324fb0dSmrg 36655324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 36665324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36675324fb0dSmrg 36685324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 36695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36705324fb0dSmrg} 36715324fb0dSmrg 36725324fb0dSmrgstatic void amdgpu_draw_test(void) 36735324fb0dSmrg{ 36745324fb0dSmrg int r; 36755324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 36765324fb0dSmrg uint32_t ring_id; 36775324fb0dSmrg 36785324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 36795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 368088f8a8d2Smrg if (!info.available_rings) 368188f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 36825324fb0dSmrg 36835324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 36845324fb0dSmrg amdgpu_memset_draw_test(device_handle, ring_id); 36859bd392adSmrg amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 36865324fb0dSmrg } 36875324fb0dSmrg} 368888f8a8d2Smrg 36899bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 36909bd392adSmrg{ 36919bd392adSmrg amdgpu_context_handle context_handle; 36929bd392adSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 36939bd392adSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 36949bd392adSmrg void *ptr_shader_ps; 36959bd392adSmrg void *ptr_shader_vs; 36969bd392adSmrg volatile unsigned char *ptr_dst; 36979bd392adSmrg unsigned char *ptr_src; 36989bd392adSmrg uint32_t *ptr_cmd; 36999bd392adSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 37009bd392adSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 37019bd392adSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 37029bd392adSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 37039bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 37049bd392adSmrg int i, r; 37059bd392adSmrg int bo_size = 0x4000000; 37069bd392adSmrg int bo_shader_ps_size = 0x400000; 37079bd392adSmrg int bo_shader_vs_size = 4096; 37089bd392adSmrg int bo_cmd_size = 4096; 37099bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 37109bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 37119bd392adSmrg uint32_t hang_state, hangs, expired; 37129bd392adSmrg amdgpu_bo_list_handle bo_list; 37139bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 37149bd392adSmrg 37159bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 37169bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37179bd392adSmrg 37189bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 37199bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37209bd392adSmrg 37219bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 37229bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 37239bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 37249bd392adSmrg &mc_address_cmd, &va_cmd); 37259bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37269bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 37279bd392adSmrg 37289bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 37299bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37309bd392adSmrg &bo_shader_ps, &ptr_shader_ps, 37319bd392adSmrg &mc_address_shader_ps, &va_shader_ps); 37329bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37339bd392adSmrg memset(ptr_shader_ps, 0, bo_shader_ps_size); 37349bd392adSmrg 37359bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 37369bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37379bd392adSmrg &bo_shader_vs, &ptr_shader_vs, 37389bd392adSmrg &mc_address_shader_vs, &va_shader_vs); 37399bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37409bd392adSmrg memset(ptr_shader_vs, 0, bo_shader_vs_size); 37419bd392adSmrg 37429bd392adSmrg r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 37439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37449bd392adSmrg 37459bd392adSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 37469bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37479bd392adSmrg 37489bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 37499bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37509bd392adSmrg &bo_src, (void **)&ptr_src, 37519bd392adSmrg &mc_address_src, &va_src); 37529bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37539bd392adSmrg 37549bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 37559bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37569bd392adSmrg &bo_dst, (void **)&ptr_dst, 37579bd392adSmrg &mc_address_dst, &va_dst); 37589bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37599bd392adSmrg 37609bd392adSmrg memset(ptr_src, 0x55, bo_size); 37619bd392adSmrg 37629bd392adSmrg i = 0; 37639bd392adSmrg i += amdgpu_draw_init(ptr_cmd + i); 37649bd392adSmrg 37659bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 37669bd392adSmrg 37679bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 37689bd392adSmrg 37699bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 37709bd392adSmrg mc_address_shader_vs, 1); 37719bd392adSmrg 37729bd392adSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 37739bd392adSmrg 37749bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 37759bd392adSmrg ptr_cmd[i++] = 0xc; 37769bd392adSmrg ptr_cmd[i++] = mc_address_src >> 8; 37779bd392adSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 37789bd392adSmrg ptr_cmd[i++] = 0x1ffc7ff; 37799bd392adSmrg ptr_cmd[i++] = 0x90500fac; 37809bd392adSmrg ptr_cmd[i++] = 0xffe000; 37819bd392adSmrg i += 3; 37829bd392adSmrg 37839bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 37849bd392adSmrg ptr_cmd[i++] = 0x14; 37859bd392adSmrg ptr_cmd[i++] = 0x92; 37869bd392adSmrg i += 3; 37879bd392adSmrg 37889bd392adSmrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 37899bd392adSmrg ptr_cmd[i++] = 0x191; 37909bd392adSmrg ptr_cmd[i++] = 0; 37919bd392adSmrg 37929bd392adSmrg i += amdgpu_draw_draw(ptr_cmd + i); 37939bd392adSmrg 37949bd392adSmrg while (i & 7) 37959bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 37969bd392adSmrg 37979bd392adSmrg resources[0] = bo_dst; 37989bd392adSmrg resources[1] = bo_src; 37999bd392adSmrg resources[2] = bo_shader_ps; 38009bd392adSmrg resources[3] = bo_shader_vs; 38019bd392adSmrg resources[4] = bo_cmd; 38029bd392adSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 38039bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38049bd392adSmrg 38059bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 38069bd392adSmrg ib_info.size = i; 38079bd392adSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 38089bd392adSmrg ibs_request.ring = ring; 38099bd392adSmrg ibs_request.resources = bo_list; 38109bd392adSmrg ibs_request.number_of_ibs = 1; 38119bd392adSmrg ibs_request.ibs = &ib_info; 38129bd392adSmrg ibs_request.fence_info.handle = NULL; 38139bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 38149bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38159bd392adSmrg 38169bd392adSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 38179bd392adSmrg fence_status.ip_instance = 0; 38189bd392adSmrg fence_status.ring = ring; 38199bd392adSmrg fence_status.context = context_handle; 38209bd392adSmrg fence_status.fence = ibs_request.seq_no; 38219bd392adSmrg 38229bd392adSmrg /* wait for IB accomplished */ 38239bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 38249bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 38259bd392adSmrg 0, &expired); 38269bd392adSmrg 38279bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 38289bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38299bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 38309bd392adSmrg 38319bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 38329bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38339bd392adSmrg 38349bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 38359bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38369bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 38379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38389bd392adSmrg 38399bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 38409bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38419bd392adSmrg 38429bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 38439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38449bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 38459bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38469bd392adSmrg 38479bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 38489bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38499bd392adSmrg} 38509bd392adSmrg 385188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void) 385288f8a8d2Smrg{ 385388f8a8d2Smrg int r; 385488f8a8d2Smrg char debugfs_path[256], tmp[10]; 385588f8a8d2Smrg int fd; 385688f8a8d2Smrg struct stat sbuf; 385788f8a8d2Smrg amdgpu_context_handle context_handle; 385888f8a8d2Smrg uint32_t hang_state, hangs; 385988f8a8d2Smrg 386088f8a8d2Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 386188f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 386288f8a8d2Smrg 386388f8a8d2Smrg r = fstat(drm_amdgpu[0], &sbuf); 386488f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 386588f8a8d2Smrg 386688f8a8d2Smrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 386788f8a8d2Smrg fd = open(debugfs_path, O_RDONLY); 386888f8a8d2Smrg CU_ASSERT(fd >= 0); 386988f8a8d2Smrg 387088f8a8d2Smrg r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 387188f8a8d2Smrg CU_ASSERT(r > 0); 387288f8a8d2Smrg 387388f8a8d2Smrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 387488f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 387588f8a8d2Smrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 387688f8a8d2Smrg 387788f8a8d2Smrg close(fd); 387888f8a8d2Smrg r = amdgpu_cs_ctx_free(context_handle); 387988f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 388088f8a8d2Smrg 388188f8a8d2Smrg amdgpu_compute_dispatch_test(); 388288f8a8d2Smrg amdgpu_gfx_dispatch_test(); 388388f8a8d2Smrg} 3884