basic_tests.c revision 0ed5401b
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 2788f8a8d2Smrg#include <sys/types.h> 2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS 2988f8a8d2Smrg#include <sys/sysmacros.h> 3088f8a8d2Smrg#endif 3188f8a8d2Smrg#include <sys/stat.h> 3288f8a8d2Smrg#include <fcntl.h> 339bd392adSmrg#if HAVE_ALLOCA_H 343f012e29Smrg# include <alloca.h> 353f012e29Smrg#endif 3600a23bdaSmrg#include <sys/wait.h> 373f012e29Smrg 383f012e29Smrg#include "CUnit/Basic.h" 393f012e29Smrg 403f012e29Smrg#include "amdgpu_test.h" 413f012e29Smrg#include "amdgpu_drm.h" 4241687f09Smrg#include "amdgpu_internal.h" 437cdc0497Smrg#include "util_math.h" 443f012e29Smrg 453f012e29Smrgstatic amdgpu_device_handle device_handle; 463f012e29Smrgstatic uint32_t major_version; 473f012e29Smrgstatic uint32_t minor_version; 48d8807b2fSmrgstatic uint32_t family_id; 494babd585Smrgstatic uint32_t chip_id; 504babd585Smrgstatic uint32_t chip_rev; 513f012e29Smrg 523f012e29Smrgstatic void amdgpu_query_info_test(void); 533f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 543f012e29Smrgstatic void amdgpu_command_submission_compute(void); 55d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 563f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 573f012e29Smrgstatic void amdgpu_userptr_test(void); 583f012e29Smrgstatic void amdgpu_semaphore_test(void); 5900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 6000a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 6188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void); 6288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void); 635324fb0dSmrgstatic void amdgpu_draw_test(void); 6488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void); 650ed5401bSmrgstatic void amdgpu_stable_pstate_test(void); 663f012e29Smrg 673f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 683f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 693f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 7000a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 7100a23bdaSmrg unsigned ip_type, 7200a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 7300a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 7400a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 7500a23bdaSmrg struct amdgpu_cs_request *ibs_request); 7641687f09Smrg 773f012e29SmrgCU_TestInfo basic_tests[] = { 783f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 793f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 8000a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 813f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 823f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 83d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 843f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 853f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 8600a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 8788f8a8d2Smrg { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 8888f8a8d2Smrg { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 895324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 9088f8a8d2Smrg { "GPU reset Test", amdgpu_gpu_reset_test }, 910ed5401bSmrg { "Stable pstate Test", amdgpu_stable_pstate_test }, 923f012e29Smrg CU_TEST_INFO_NULL, 933f012e29Smrg}; 949bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 953f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 963f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 973f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 983f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 993f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 1003f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 1013f012e29Smrg /* 0 = byte fill 1023f012e29Smrg * 2 = DW fill 1033f012e29Smrg */ 1043f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1053f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 1063f012e29Smrg (((op) & 0xFF) << 0)) 1073f012e29Smrg#define SDMA_OPCODE_WRITE 2 1083f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1093f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 1103f012e29Smrg 1113f012e29Smrg#define SDMA_OPCODE_COPY 1 1123f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 1133f012e29Smrg 11441687f09Smrg#define SDMA_OPCODE_ATOMIC 10 11541687f09Smrg# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 11641687f09Smrg /* 0 - single_pass_atomic. 11741687f09Smrg * 1 - loop_until_compare_satisfied. 11841687f09Smrg */ 11941687f09Smrg# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 12041687f09Smrg /* 0 - non-TMZ. 12141687f09Smrg * 1 - TMZ. 12241687f09Smrg */ 12341687f09Smrg# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 12441687f09Smrg /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 12541687f09Smrg * same as Packet 3 12641687f09Smrg */ 12741687f09Smrg 1283f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1293f012e29Smrg#define SDMA_NOP 0x0 1303f012e29Smrg 1313f012e29Smrg/* PM4 */ 1323f012e29Smrg#define PACKET_TYPE0 0 1333f012e29Smrg#define PACKET_TYPE1 1 1343f012e29Smrg#define PACKET_TYPE2 2 1353f012e29Smrg#define PACKET_TYPE3 3 1363f012e29Smrg 1373f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1383f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1393f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1403f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1413f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1423f012e29Smrg ((reg) & 0xFFFF) | \ 1433f012e29Smrg ((n) & 0x3FFF) << 16) 1443f012e29Smrg#define CP_PACKET2 0x80000000 1453f012e29Smrg#define PACKET2_PAD_SHIFT 0 1463f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1473f012e29Smrg 1483f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1493f012e29Smrg 1503f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1513f012e29Smrg (((op) & 0xFF) << 8) | \ 1523f012e29Smrg ((n) & 0x3FFF) << 16) 1535324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1543f012e29Smrg 1553f012e29Smrg/* Packet 3 types */ 1563f012e29Smrg#define PACKET3_NOP 0x10 1573f012e29Smrg 1583f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1593f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1603f012e29Smrg /* 0 - register 1613f012e29Smrg * 1 - memory (sync - via GRBM) 1623f012e29Smrg * 2 - gl2 1633f012e29Smrg * 3 - gds 1643f012e29Smrg * 4 - reserved 1653f012e29Smrg * 5 - memory (async - direct) 1663f012e29Smrg */ 1673f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1683f012e29Smrg#define WR_CONFIRM (1 << 20) 1693f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1703f012e29Smrg /* 0 - LRU 1713f012e29Smrg * 1 - Stream 1723f012e29Smrg */ 1733f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1743f012e29Smrg /* 0 - me 1753f012e29Smrg * 1 - pfp 1763f012e29Smrg * 2 - ce 1773f012e29Smrg */ 1783f012e29Smrg 17941687f09Smrg#define PACKET3_ATOMIC_MEM 0x1E 18041687f09Smrg#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 18141687f09Smrg#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 18241687f09Smrg /* 0 - single_pass_atomic. 18341687f09Smrg * 1 - loop_until_compare_satisfied. 18441687f09Smrg */ 18541687f09Smrg#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 18641687f09Smrg /* 0 - lru. 18741687f09Smrg * 1 - stream. 18841687f09Smrg */ 18941687f09Smrg#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 19041687f09Smrg /* 0 - micro_engine. 19141687f09Smrg */ 19241687f09Smrg 1933f012e29Smrg#define PACKET3_DMA_DATA 0x50 1943f012e29Smrg/* 1. header 1953f012e29Smrg * 2. CONTROL 1963f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1973f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1983f012e29Smrg * 5. DST_ADDR_LO [31:0] 1993f012e29Smrg * 6. DST_ADDR_HI [7:0] 2003f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 2013f012e29Smrg */ 2023f012e29Smrg/* CONTROL */ 2033f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 2043f012e29Smrg /* 0 - ME 2053f012e29Smrg * 1 - PFP 2063f012e29Smrg */ 2073f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 2083f012e29Smrg /* 0 - LRU 2093f012e29Smrg * 1 - Stream 2103f012e29Smrg * 2 - Bypass 2113f012e29Smrg */ 2123f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 2133f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 2143f012e29Smrg /* 0 - DST_ADDR using DAS 2153f012e29Smrg * 1 - GDS 2163f012e29Smrg * 3 - DST_ADDR using L2 2173f012e29Smrg */ 2183f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 2193f012e29Smrg /* 0 - LRU 2203f012e29Smrg * 1 - Stream 2213f012e29Smrg * 2 - Bypass 2223f012e29Smrg */ 2233f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 2243f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 2253f012e29Smrg /* 0 - SRC_ADDR using SAS 2263f012e29Smrg * 1 - GDS 2273f012e29Smrg * 2 - DATA 2283f012e29Smrg * 3 - SRC_ADDR using L2 2293f012e29Smrg */ 2303f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 2313f012e29Smrg/* COMMAND */ 2323f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 2333f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 2343f012e29Smrg /* 0 - none 2353f012e29Smrg * 1 - 8 in 16 2363f012e29Smrg * 2 - 8 in 32 2373f012e29Smrg * 3 - 8 in 64 2383f012e29Smrg */ 2393f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 2403f012e29Smrg /* 0 - none 2413f012e29Smrg * 1 - 8 in 16 2423f012e29Smrg * 2 - 8 in 32 2433f012e29Smrg * 3 - 8 in 64 2443f012e29Smrg */ 2453f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2463f012e29Smrg /* 0 - memory 2473f012e29Smrg * 1 - register 2483f012e29Smrg */ 2493f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2503f012e29Smrg /* 0 - memory 2513f012e29Smrg * 1 - register 2523f012e29Smrg */ 2533f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2543f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2553f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2563f012e29Smrg 257d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 258d8807b2fSmrg (((b) & 0x1) << 26) | \ 259d8807b2fSmrg (((t) & 0x1) << 23) | \ 260d8807b2fSmrg (((s) & 0x1) << 22) | \ 261d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 262d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 263d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 264d8807b2fSmrg#define SDMA_NOP_SI 0xf 265d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 266d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 267d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 268d8807b2fSmrg /* 0 - ME 269d8807b2fSmrg * 1 - PFP 270d8807b2fSmrg */ 271d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 272d8807b2fSmrg /* 0 - DST_ADDR using DAS 273d8807b2fSmrg * 1 - GDS 274d8807b2fSmrg * 3 - DST_ADDR using L2 275d8807b2fSmrg */ 276d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 277d8807b2fSmrg /* 0 - SRC_ADDR using SAS 278d8807b2fSmrg * 1 - GDS 279d8807b2fSmrg * 2 - DATA 280d8807b2fSmrg * 3 - SRC_ADDR using L2 281d8807b2fSmrg */ 282d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 283d8807b2fSmrg 28400a23bdaSmrg 28500a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 28600a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28700a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 28800a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28900a23bdaSmrg 29000a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 29100a23bdaSmrg 29200a23bdaSmrg#define PKT3_SET_SH_REG 0x76 29300a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 29400a23bdaSmrg 2950ed5401bSmrg#define PKT3_SET_SH_REG_INDEX 0x9B 2960ed5401bSmrg 29700a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2985324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2995324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 3005324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 3015324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 3025324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 30300a23bdaSmrg/* gfx 8 */ 30400a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 30500a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 30600a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 30700a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 30800a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 30900a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 31000a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 31100a23bdaSmrg 31200a23bdaSmrg 31300a23bdaSmrg 31400a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 31500a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 31600a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 31700a23bdaSmrg ((num & 0x000000ff) << 24)) 31800a23bdaSmrg 31900a23bdaSmrg 32000a23bdaSmrg/* Shader code 32100a23bdaSmrg * void main() 32200a23bdaSmrg{ 32300a23bdaSmrg 32400a23bdaSmrg float x = some_input; 32500a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 32600a23bdaSmrg x = sin(x); 32700a23bdaSmrg 32800a23bdaSmrg u[0] = 42u; 32900a23bdaSmrg} 33000a23bdaSmrg*/ 33100a23bdaSmrg 33200a23bdaSmrgstatic uint32_t shader_bin[] = { 33300a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 33400a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 33500a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 33600a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 33700a23bdaSmrg}; 33800a23bdaSmrg 33900a23bdaSmrg#define CODE_OFFSET 512 34000a23bdaSmrg#define DATA_OFFSET 1024 34100a23bdaSmrg 3425324fb0dSmrgenum cs_type { 3435324fb0dSmrg CS_BUFFERCLEAR, 3449bd392adSmrg CS_BUFFERCOPY, 3459bd392adSmrg CS_HANG, 3469bd392adSmrg CS_HANG_SLOW 3475324fb0dSmrg}; 3485324fb0dSmrg 3495324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3504babd585Smrg 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 3514babd585Smrg 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, 3524babd585Smrg 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, 3534babd585Smrg 0xbf810000 3545324fb0dSmrg}; 3555324fb0dSmrg 3560ed5401bSmrgstatic const uint32_t bufferclear_cs_shader_gfx10[] = { 3570ed5401bSmrg 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205, 3580ed5401bSmrg 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004, 3590ed5401bSmrg 0xBF810000 3600ed5401bSmrg}; 3610ed5401bSmrg 3625324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3635324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3645324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3655324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3665324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3675324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3685324fb0dSmrg}; 3695324fb0dSmrg 3705324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3715324fb0dSmrg 3725324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3734babd585Smrg 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 3744babd585Smrg 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, 3754babd585Smrg 0xe01c2000, 0x80010200, 0xbf810000 3765324fb0dSmrg}; 3775324fb0dSmrg 3780ed5401bSmrgstatic const uint32_t buffercopy_cs_shader_gfx10[] = { 3790ed5401bSmrg 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201, 3800ed5401bSmrg 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000 3810ed5401bSmrg}; 3820ed5401bSmrg 3835324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3845324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3855324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3865324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3875324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3885324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3895324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3905324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3915324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 39288f8a8d2Smrg 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 3935324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3945324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3955324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3965324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3975324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3985324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 39988f8a8d2Smrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 40088f8a8d2Smrg 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 4015324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 4025324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 4035324fb0dSmrg 0xc0017900, 0x24b, 0x0 4045324fb0dSmrg}; 4055324fb0dSmrg 4060ed5401bSmrgstatic const uint32_t preamblecache_gfx10[] = { 4070ed5401bSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 4080ed5401bSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 4090ed5401bSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 4100ed5401bSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 4110ed5401bSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 4120ed5401bSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 4130ed5401bSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 4140ed5401bSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 4150ed5401bSmrg 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20, 4160ed5401bSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 4170ed5401bSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0, 4180ed5401bSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 4190ed5401bSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 4200ed5401bSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 4210ed5401bSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 4220ed5401bSmrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 4230ed5401bSmrg 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2, 4240ed5401bSmrg 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0, 4250ed5401bSmrg 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff, 4260ed5401bSmrg 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0, 4270ed5401bSmrg 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0, 4280ed5401bSmrg 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 4290ed5401bSmrg 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 4300ed5401bSmrg}; 4310ed5401bSmrg 4325324fb0dSmrgenum ps_type { 4335324fb0dSmrg PS_CONST, 4349bd392adSmrg PS_TEX, 4359bd392adSmrg PS_HANG, 4369bd392adSmrg PS_HANG_SLOW 4375324fb0dSmrg}; 4385324fb0dSmrg 4395324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 4405324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 4415324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 4425324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 4435324fb0dSmrg}; 4445324fb0dSmrg 4455324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 4465324fb0dSmrg 4475324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 4485324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4495324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 4505324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 4515324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 4525324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4535324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4545324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4555324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4565324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4575324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 4585324fb0dSmrg } 4595324fb0dSmrg}; 4605324fb0dSmrg 4615324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 4625324fb0dSmrg 0x00000004 4635324fb0dSmrg}; 4645324fb0dSmrg 4655324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 4665324fb0dSmrg 4675324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 4685324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 4695324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 4705324fb0dSmrg}; 4715324fb0dSmrg 4725324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 4735324fb0dSmrg 4745324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 4755324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4765324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 4775324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4785324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4795324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4805324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4815324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4825324fb0dSmrg}; 4835324fb0dSmrg 4840ed5401bSmrgstatic const uint32_t ps_const_shader_gfx10[] = { 4850ed5401bSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 4860ed5401bSmrg 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 4870ed5401bSmrg 0xF8001C0F, 0x00000100, 0xBF810000 4880ed5401bSmrg}; 4890ed5401bSmrg 4900ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6; 4910ed5401bSmrg 4920ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = { 4930ed5401bSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 4940ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 }, 4950ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 }, 4960ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 }, 4970ed5401bSmrg { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 4980ed5401bSmrg { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 }, 4990ed5401bSmrg { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 }, 5000ed5401bSmrg { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 5010ed5401bSmrg { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 5020ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 } 5030ed5401bSmrg } 5040ed5401bSmrg}; 5050ed5401bSmrg 5060ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = { 5070ed5401bSmrg 0x00000004 5080ed5401bSmrg}; 5090ed5401bSmrg 5100ed5401bSmrgstatic const uint32_t ps_num_sh_registers_gfx10 = 2; 5110ed5401bSmrg 5120ed5401bSmrgstatic const uint32_t ps_const_sh_registers_gfx10[][2] = { 5130ed5401bSmrg {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 }, 5140ed5401bSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 5150ed5401bSmrg}; 5160ed5401bSmrg 5175324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 5185324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 5195324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 5205324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 5215324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 5225324fb0dSmrg 0x00000100, 0xBF810000 5235324fb0dSmrg}; 5245324fb0dSmrg 5255324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 5265324fb0dSmrg 0x0000000B 5275324fb0dSmrg}; 5285324fb0dSmrg 5295324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 5305324fb0dSmrg 5315324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 5325324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 5335324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 5345324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 5355324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 5365324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 5375324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 5385324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 5395324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 5405324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 5415324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 5425324fb0dSmrg } 5435324fb0dSmrg}; 5445324fb0dSmrg 5455324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 5465324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 5475324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 5485324fb0dSmrg}; 5495324fb0dSmrg 5505324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 5515324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 5525324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 5535324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 5545324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 5555324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 5565324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 5575324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 5585324fb0dSmrg}; 5595324fb0dSmrg 5600ed5401bSmrgstatic const uint32_t ps_tex_shader_gfx10[] = { 5610ed5401bSmrg 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, 5620ed5401bSmrg 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A, 5630ed5401bSmrg 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70, 5640ed5401bSmrg 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 5650ed5401bSmrg 0xF8001C0F, 0x00000100, 0xBF810000 5660ed5401bSmrg}; 5670ed5401bSmrg 5680ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = { 5690ed5401bSmrg 0x0000000C 5700ed5401bSmrg}; 5710ed5401bSmrg 5720ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6; 5730ed5401bSmrg 5740ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = { 5750ed5401bSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 5760ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 }, 5770ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 }, 5780ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 }, 5790ed5401bSmrg { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 5800ed5401bSmrg { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 5810ed5401bSmrg { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 5820ed5401bSmrg { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 5830ed5401bSmrg { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 5840ed5401bSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 } 5850ed5401bSmrg } 5860ed5401bSmrg}; 5870ed5401bSmrg 5885324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 5895324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 5905324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 5915324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 5925324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 5935324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 5945324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 5955324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 5965324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 5975324fb0dSmrg}; 5985324fb0dSmrg 5990ed5401bSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx10[] = { 6000ed5401bSmrg 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206, 6010ed5401bSmrg 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200, 6020ed5401bSmrg 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207, 6030ed5401bSmrg 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001, 6040ed5401bSmrg 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002, 6050ed5401bSmrg 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209, 6060ed5401bSmrg 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402, 6070ed5401bSmrg 0xBF810000 6080ed5401bSmrg}; 6090ed5401bSmrg 6105324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 6115324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 6125324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 6135324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 6149bd392adSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 6155324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 6165324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 6175324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 6185324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 6195324fb0dSmrg}; 62000a23bdaSmrg 6210ed5401bSmrgstatic const uint32_t cached_cmd_gfx10[] = { 6220ed5401bSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 6230ed5401bSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 6240ed5401bSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 6250ed5401bSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18, 6260ed5401bSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 6270ed5401bSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 6280ed5401bSmrg 0xc0026900, 0x292, 0x20, 0x6020000, 6290ed5401bSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 6300ed5401bSmrg}; 6310ed5401bSmrg 6329bd392adSmrgunsigned int memcpy_ps_hang[] = { 6339bd392adSmrg 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 6349bd392adSmrg 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 6359bd392adSmrg 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 6369bd392adSmrg 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 6379bd392adSmrg 0xF800180F, 0x03020100, 0xBF810000 6389bd392adSmrg}; 6399bd392adSmrg 6409bd392adSmrgstruct amdgpu_test_shader { 6419bd392adSmrg uint32_t *shader; 6429bd392adSmrg uint32_t header_length; 6439bd392adSmrg uint32_t body_length; 6449bd392adSmrg uint32_t foot_length; 6459bd392adSmrg}; 6469bd392adSmrg 6479bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = { 6489bd392adSmrg 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 6499bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 6509bd392adSmrg}; 6519bd392adSmrg 6529bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 6539bd392adSmrg memcpy_cs_hang_slow_ai_codes, 6549bd392adSmrg 4, 6559bd392adSmrg 3, 6569bd392adSmrg 1 6579bd392adSmrg}; 6589bd392adSmrg 6599bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = { 6609bd392adSmrg 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 6619bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 6629bd392adSmrg}; 6639bd392adSmrg 6649bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 6659bd392adSmrg memcpy_cs_hang_slow_rv_codes, 6669bd392adSmrg 4, 6679bd392adSmrg 3, 6689bd392adSmrg 1 6699bd392adSmrg}; 6709bd392adSmrg 6710ed5401bSmrgunsigned int memcpy_cs_hang_slow_nv_codes[] = { 6720ed5401bSmrg 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100, 6730ed5401bSmrg 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000 6740ed5401bSmrg}; 6750ed5401bSmrg 6760ed5401bSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_nv = { 6770ed5401bSmrg memcpy_cs_hang_slow_nv_codes, 6780ed5401bSmrg 4, 6790ed5401bSmrg 3, 6800ed5401bSmrg 1 6810ed5401bSmrg}; 6820ed5401bSmrg 6839bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = { 6849bd392adSmrg 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 6859bd392adSmrg 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 6869bd392adSmrg 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 6879bd392adSmrg 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 6889bd392adSmrg 0x03020100, 0xbf810000 6899bd392adSmrg}; 6909bd392adSmrg 6919bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 6929bd392adSmrg memcpy_ps_hang_slow_ai_codes, 6939bd392adSmrg 7, 6949bd392adSmrg 2, 6959bd392adSmrg 9 6969bd392adSmrg}; 6979bd392adSmrg 6987cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 6997cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 7007cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 7017cdc0497Smrg uint64_t *mc_address, 7027cdc0497Smrg amdgpu_va_handle *va_handle) 7037cdc0497Smrg{ 7047cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 7057cdc0497Smrg amdgpu_bo_handle buf_handle; 7067cdc0497Smrg amdgpu_va_handle handle; 7077cdc0497Smrg uint64_t vmc_addr; 7087cdc0497Smrg int r; 7097cdc0497Smrg 7107cdc0497Smrg request.alloc_size = size; 7117cdc0497Smrg request.phys_alignment = alignment; 7127cdc0497Smrg request.preferred_heap = heap; 7137cdc0497Smrg request.flags = alloc_flags; 7147cdc0497Smrg 7157cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 7167cdc0497Smrg if (r) 7177cdc0497Smrg return r; 7187cdc0497Smrg 7197cdc0497Smrg r = amdgpu_va_range_alloc(dev, 7207cdc0497Smrg amdgpu_gpu_va_range_general, 7217cdc0497Smrg size, alignment, 0, &vmc_addr, 7227cdc0497Smrg &handle, 0); 7237cdc0497Smrg if (r) 7247cdc0497Smrg goto error_va_alloc; 7257cdc0497Smrg 7267cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 7277cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 7287cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 7297cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 7307cdc0497Smrg mapping_flags, 7317cdc0497Smrg AMDGPU_VA_OP_MAP); 7327cdc0497Smrg if (r) 7337cdc0497Smrg goto error_va_map; 7347cdc0497Smrg 7357cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 7367cdc0497Smrg if (r) 7377cdc0497Smrg goto error_cpu_map; 7387cdc0497Smrg 7397cdc0497Smrg *bo = buf_handle; 7407cdc0497Smrg *mc_address = vmc_addr; 7417cdc0497Smrg *va_handle = handle; 7427cdc0497Smrg 7437cdc0497Smrg return 0; 7447cdc0497Smrg 7457cdc0497Smrg error_cpu_map: 7467cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 7477cdc0497Smrg 7487cdc0497Smrg error_va_map: 7497cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 7507cdc0497Smrg 7517cdc0497Smrg error_va_alloc: 7527cdc0497Smrg amdgpu_bo_free(buf_handle); 7537cdc0497Smrg return r; 7547cdc0497Smrg} 7557cdc0497Smrg 7567cdc0497Smrg 7577cdc0497Smrg 75841687f09SmrgCU_BOOL suite_basic_tests_enable(void) 75941687f09Smrg{ 76041687f09Smrg 76141687f09Smrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 76241687f09Smrg &minor_version, &device_handle)) 76341687f09Smrg return CU_FALSE; 76441687f09Smrg 7654babd585Smrg 7664babd585Smrg family_id = device_handle->info.family_id; 7674babd585Smrg chip_id = device_handle->info.chip_external_rev; 7684babd585Smrg chip_rev = device_handle->info.chip_rev; 76941687f09Smrg 77041687f09Smrg if (amdgpu_device_deinitialize(device_handle)) 77141687f09Smrg return CU_FALSE; 77241687f09Smrg 7734babd585Smrg /* disable gfx engine basic test cases for some asics have no CPG */ 7744babd585Smrg if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 77541687f09Smrg if (amdgpu_set_test_active("Basic Tests", 77641687f09Smrg "Command submission Test (GFX)", 77741687f09Smrg CU_FALSE)) 77841687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 77941687f09Smrg CU_get_error_msg()); 78041687f09Smrg 78141687f09Smrg if (amdgpu_set_test_active("Basic Tests", 78241687f09Smrg "Command submission Test (Multi-Fence)", 78341687f09Smrg CU_FALSE)) 78441687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 78541687f09Smrg CU_get_error_msg()); 78641687f09Smrg 78741687f09Smrg if (amdgpu_set_test_active("Basic Tests", 78841687f09Smrg "Sync dependency Test", 78941687f09Smrg CU_FALSE)) 79041687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 79141687f09Smrg CU_get_error_msg()); 79241687f09Smrg } 79341687f09Smrg 79441687f09Smrg return CU_TRUE; 79541687f09Smrg} 79641687f09Smrg 7973f012e29Smrgint suite_basic_tests_init(void) 7983f012e29Smrg{ 799d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 8003f012e29Smrg int r; 8013f012e29Smrg 8023f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 8033f012e29Smrg &minor_version, &device_handle); 8043f012e29Smrg 805d8807b2fSmrg if (r) { 806037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 807037b3c26Smrg printf("\n\nError:%s. " 808037b3c26Smrg "Hint:Try to run this test program as root.", 809037b3c26Smrg strerror(errno)); 8103f012e29Smrg return CUE_SINIT_FAILED; 811037b3c26Smrg } 812d8807b2fSmrg 813d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 814d8807b2fSmrg if (r) 815d8807b2fSmrg return CUE_SINIT_FAILED; 816d8807b2fSmrg 817d8807b2fSmrg family_id = gpu_info.family_id; 818d8807b2fSmrg 819d8807b2fSmrg return CUE_SUCCESS; 8203f012e29Smrg} 8213f012e29Smrg 8223f012e29Smrgint suite_basic_tests_clean(void) 8233f012e29Smrg{ 8243f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 8253f012e29Smrg 8263f012e29Smrg if (r == 0) 8273f012e29Smrg return CUE_SUCCESS; 8283f012e29Smrg else 8293f012e29Smrg return CUE_SCLEAN_FAILED; 8303f012e29Smrg} 8313f012e29Smrg 8323f012e29Smrgstatic void amdgpu_query_info_test(void) 8333f012e29Smrg{ 8343f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 8353f012e29Smrg uint32_t version, feature; 8363f012e29Smrg int r; 8373f012e29Smrg 8383f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 8393f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8403f012e29Smrg 8413f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 8423f012e29Smrg 0, &version, &feature); 8433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8443f012e29Smrg} 8453f012e29Smrg 8463f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 8473f012e29Smrg{ 8483f012e29Smrg amdgpu_context_handle context_handle; 8493f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 8503f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 8513f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 8523f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 8533f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 8543f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 8553f012e29Smrg uint32_t *ptr; 8563f012e29Smrg uint32_t expired; 8573f012e29Smrg amdgpu_bo_list_handle bo_list; 8583f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 859d8807b2fSmrg int r, i = 0; 8603f012e29Smrg 8613f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 8623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8633f012e29Smrg 8643f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 8653f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 8663f012e29Smrg &ib_result_handle, &ib_result_cpu, 8673f012e29Smrg &ib_result_mc_address, &va_handle); 8683f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8693f012e29Smrg 8703f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 8713f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 8723f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 8733f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 8743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8753f012e29Smrg 8763f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 8773f012e29Smrg ib_result_ce_handle, &bo_list); 8783f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8793f012e29Smrg 8803f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 8813f012e29Smrg 8823f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 8833f012e29Smrg ptr = ib_result_ce_cpu; 884d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 885d8807b2fSmrg ptr[i++] = 0xc0008900; 886d8807b2fSmrg ptr[i++] = 0; 887d8807b2fSmrg } 888d8807b2fSmrg ptr[i++] = 0xc0008400; 889d8807b2fSmrg ptr[i++] = 1; 8903f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 891d8807b2fSmrg ib_info[0].size = i; 8923f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 8933f012e29Smrg 8943f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 8953f012e29Smrg ptr = ib_result_cpu; 8963f012e29Smrg ptr[0] = 0xc0008600; 8973f012e29Smrg ptr[1] = 0x00000001; 8983f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 8993f012e29Smrg ib_info[1].size = 2; 9003f012e29Smrg 9013f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 9023f012e29Smrg ibs_request.number_of_ibs = 2; 9033f012e29Smrg ibs_request.ibs = ib_info; 9043f012e29Smrg ibs_request.resources = bo_list; 9053f012e29Smrg ibs_request.fence_info.handle = NULL; 9063f012e29Smrg 9073f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 9083f012e29Smrg 9093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9103f012e29Smrg 9113f012e29Smrg fence_status.context = context_handle; 9123f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 9133f012e29Smrg fence_status.ip_instance = 0; 9143f012e29Smrg fence_status.fence = ibs_request.seq_no; 9153f012e29Smrg 9163f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 9173f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 9183f012e29Smrg 0, &expired); 9193f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9203f012e29Smrg 9213f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 9223f012e29Smrg ib_result_mc_address, 4096); 9233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9243f012e29Smrg 9253f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 9263f012e29Smrg ib_result_ce_mc_address, 4096); 9273f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9283f012e29Smrg 9293f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 9303f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9313f012e29Smrg 9323f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 9333f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9343f012e29Smrg 9353f012e29Smrg} 9363f012e29Smrg 9373f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 9383f012e29Smrg{ 9393f012e29Smrg amdgpu_context_handle context_handle; 9403f012e29Smrg amdgpu_bo_handle ib_result_handle; 9413f012e29Smrg void *ib_result_cpu; 9423f012e29Smrg uint64_t ib_result_mc_address; 9433f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 9443f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 9453f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 9463f012e29Smrg uint32_t *ptr; 9473f012e29Smrg uint32_t expired; 9483f012e29Smrg amdgpu_bo_list_handle bo_list; 9493f012e29Smrg amdgpu_va_handle va_handle; 950d8807b2fSmrg int r, i = 0; 9513f012e29Smrg 9523f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 9533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9543f012e29Smrg 9553f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 9563f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 9573f012e29Smrg &ib_result_handle, &ib_result_cpu, 9583f012e29Smrg &ib_result_mc_address, &va_handle); 9593f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9603f012e29Smrg 9613f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 9623f012e29Smrg &bo_list); 9633f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9643f012e29Smrg 9653f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 9663f012e29Smrg 9673f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 9683f012e29Smrg ptr = ib_result_cpu; 969d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 970d8807b2fSmrg ptr[i++] = 0xc0008900; 971d8807b2fSmrg ptr[i++] = 0; 972d8807b2fSmrg } 973d8807b2fSmrg ptr[i++] = 0xc0008400; 974d8807b2fSmrg ptr[i++] = 1; 9753f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 976d8807b2fSmrg ib_info[0].size = i; 9773f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 9783f012e29Smrg 9793f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 9803f012e29Smrg ptr[0] = 0xc0008600; 9813f012e29Smrg ptr[1] = 0x00000001; 9823f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 9833f012e29Smrg ib_info[1].size = 2; 9843f012e29Smrg 9853f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 9863f012e29Smrg ibs_request.number_of_ibs = 2; 9873f012e29Smrg ibs_request.ibs = ib_info; 9883f012e29Smrg ibs_request.resources = bo_list; 9893f012e29Smrg ibs_request.fence_info.handle = NULL; 9903f012e29Smrg 9913f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 9923f012e29Smrg 9933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9943f012e29Smrg 9953f012e29Smrg fence_status.context = context_handle; 9963f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 9973f012e29Smrg fence_status.ip_instance = 0; 9983f012e29Smrg fence_status.fence = ibs_request.seq_no; 9993f012e29Smrg 10003f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10013f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 10023f012e29Smrg 0, &expired); 10033f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10043f012e29Smrg 10053f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 10063f012e29Smrg ib_result_mc_address, 4096); 10073f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10083f012e29Smrg 10093f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 10103f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10113f012e29Smrg 10123f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 10133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10143f012e29Smrg} 10153f012e29Smrg 10163f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 10173f012e29Smrg{ 10183f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 10193f012e29Smrg} 10203f012e29Smrg 10213f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 10223f012e29Smrg{ 10233f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 10243f012e29Smrg} 10253f012e29Smrg 10263f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 10273f012e29Smrg{ 10283f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 10293f012e29Smrg} 10303f012e29Smrg 103100a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 103200a23bdaSmrg{ 103300a23bdaSmrg const int sdma_write_length = 1024; 103400a23bdaSmrg const int pm4_dw = 256; 103500a23bdaSmrg amdgpu_context_handle context_handle; 103600a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 103700a23bdaSmrg amdgpu_bo_handle *resources; 103800a23bdaSmrg uint32_t *pm4; 103900a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 104000a23bdaSmrg struct amdgpu_cs_request *ibs_request; 104100a23bdaSmrg uint64_t bo1_mc, bo2_mc; 104200a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 104300a23bdaSmrg int i, j, r, loop1, loop2; 104400a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 104500a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 104600a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 104700a23bdaSmrg 104800a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 104900a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 105000a23bdaSmrg 105100a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 105200a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 105300a23bdaSmrg 105400a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 105500a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 105600a23bdaSmrg 105700a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 105800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 105900a23bdaSmrg 106000a23bdaSmrg /* prepare resource */ 106100a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 106200a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 106300a23bdaSmrg 106400a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 106500a23bdaSmrg 0, &vram_info); 106600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 106700a23bdaSmrg 106800a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 106900a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 107000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 107100a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 107200a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 107300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 107400a23bdaSmrg 10754babd585Smrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 10764babd585Smrg 0, >t_info); 10774babd585Smrg CU_ASSERT_EQUAL(r, 0); 10784babd585Smrg 107900a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 108000a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 108100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 108200a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 108300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 108400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 108500a23bdaSmrg 108600a23bdaSmrg 108700a23bdaSmrg 108800a23bdaSmrg loop1 = loop2 = 0; 108900a23bdaSmrg /* run 9 circle to test all mapping combination */ 109000a23bdaSmrg while(loop1 < 2) { 109100a23bdaSmrg while(loop2 < 2) { 109200a23bdaSmrg /* allocate UC bo1for sDMA use */ 109300a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 109400a23bdaSmrg sdma_write_length, 4096, 109500a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 109600a23bdaSmrg gtt_flags[loop1], &bo1, 109700a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 109800a23bdaSmrg &bo1_va_handle); 109900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 110000a23bdaSmrg 110100a23bdaSmrg /* set bo1 */ 110200a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 110300a23bdaSmrg 110400a23bdaSmrg /* allocate UC bo2 for sDMA use */ 110500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 110600a23bdaSmrg sdma_write_length, 4096, 110700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 110800a23bdaSmrg gtt_flags[loop2], &bo2, 110900a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 111000a23bdaSmrg &bo2_va_handle); 111100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 111200a23bdaSmrg 111300a23bdaSmrg /* clear bo2 */ 111400a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 111500a23bdaSmrg 111600a23bdaSmrg resources[0] = bo1; 111700a23bdaSmrg resources[1] = bo2; 111800a23bdaSmrg resources[2] = vram_max[loop2]; 111900a23bdaSmrg resources[3] = gtt_max[loop2]; 112000a23bdaSmrg 112100a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 112200a23bdaSmrg i = j = 0; 112300a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 112400a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 112500a23bdaSmrg sdma_write_length); 112600a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 112700a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 112800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 112900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 113000a23bdaSmrg } else { 113100a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 113200a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 113300a23bdaSmrg pm4[i++] = sdma_write_length - 1; 113400a23bdaSmrg else 113500a23bdaSmrg pm4[i++] = sdma_write_length; 113600a23bdaSmrg pm4[i++] = 0; 113700a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 113800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 113900a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 114000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 114100a23bdaSmrg } 114200a23bdaSmrg 114300a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 114400a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 114500a23bdaSmrg i, pm4, 114600a23bdaSmrg 4, resources, 114700a23bdaSmrg ib_info, ibs_request); 114800a23bdaSmrg 114900a23bdaSmrg /* verify if SDMA test result meets with expected */ 115000a23bdaSmrg i = 0; 115100a23bdaSmrg while(i < sdma_write_length) { 115200a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 115300a23bdaSmrg } 115400a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 115500a23bdaSmrg sdma_write_length); 115600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 115700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 115800a23bdaSmrg sdma_write_length); 115900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 116000a23bdaSmrg loop2++; 116100a23bdaSmrg } 116200a23bdaSmrg loop2 = 0; 116300a23bdaSmrg loop1++; 116400a23bdaSmrg } 116500a23bdaSmrg amdgpu_bo_free(vram_max[0]); 116600a23bdaSmrg amdgpu_bo_free(vram_max[1]); 116700a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 116800a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 116900a23bdaSmrg /* clean resources */ 117000a23bdaSmrg free(resources); 117100a23bdaSmrg free(ibs_request); 117200a23bdaSmrg free(ib_info); 117300a23bdaSmrg free(pm4); 117400a23bdaSmrg 117500a23bdaSmrg /* end of test */ 117600a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 117700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 117800a23bdaSmrg} 117900a23bdaSmrg 118000a23bdaSmrg 11813f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 11823f012e29Smrg{ 11833f012e29Smrg /* write data using the CP */ 11843f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 11853f012e29Smrg /* const fill using the CP */ 11863f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 11873f012e29Smrg /* copy data using the CP */ 11883f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 11893f012e29Smrg /* separate IB buffers for multi-IB submission */ 11903f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 11913f012e29Smrg /* shared IB buffer for multi-IB submission */ 11923f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 11933f012e29Smrg} 11943f012e29Smrg 11953f012e29Smrgstatic void amdgpu_semaphore_test(void) 11963f012e29Smrg{ 11973f012e29Smrg amdgpu_context_handle context_handle[2]; 11983f012e29Smrg amdgpu_semaphore_handle sem; 11993f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 12003f012e29Smrg void *ib_result_cpu[2]; 12013f012e29Smrg uint64_t ib_result_mc_address[2]; 12023f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 12033f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 12043f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 12053f012e29Smrg uint32_t *ptr; 12063f012e29Smrg uint32_t expired; 1207d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 12083f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 12093f012e29Smrg amdgpu_va_handle va_handle[2]; 12103f012e29Smrg int r, i; 12114babd585Smrg struct amdgpu_gpu_info gpu_info = {0}; 12124babd585Smrg unsigned gc_ip_type; 12134babd585Smrg 12144babd585Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 12154babd585Smrg CU_ASSERT_EQUAL(r, 0); 12164babd585Smrg 12174babd585Smrg gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 12184babd585Smrg AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 12193f012e29Smrg 1220d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 1221d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1222d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 1223d8807b2fSmrg } else { 1224d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1225d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 1226d8807b2fSmrg } 1227d8807b2fSmrg 12283f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 12293f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12303f012e29Smrg for (i = 0; i < 2; i++) { 12313f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 12323f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12333f012e29Smrg 12343f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 12353f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 12363f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 12373f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 12383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12393f012e29Smrg 12403f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 12413f012e29Smrg NULL, &bo_list[i]); 12423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12433f012e29Smrg } 12443f012e29Smrg 12453f012e29Smrg /* 1. same context different engine */ 12463f012e29Smrg ptr = ib_result_cpu[0]; 1247d8807b2fSmrg ptr[0] = sdma_nop; 12483f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 12493f012e29Smrg ib_info[0].size = 1; 12503f012e29Smrg 12513f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 12523f012e29Smrg ibs_request[0].number_of_ibs = 1; 12533f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 12543f012e29Smrg ibs_request[0].resources = bo_list[0]; 12553f012e29Smrg ibs_request[0].fence_info.handle = NULL; 12563f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 12573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12583f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 12593f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12603f012e29Smrg 12614babd585Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 12623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12633f012e29Smrg ptr = ib_result_cpu[1]; 1264d8807b2fSmrg ptr[0] = gfx_nop; 12653f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 12663f012e29Smrg ib_info[1].size = 1; 12673f012e29Smrg 12684babd585Smrg ibs_request[1].ip_type = gc_ip_type; 12693f012e29Smrg ibs_request[1].number_of_ibs = 1; 12703f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 12713f012e29Smrg ibs_request[1].resources = bo_list[1]; 12723f012e29Smrg ibs_request[1].fence_info.handle = NULL; 12733f012e29Smrg 12743f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 12753f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12763f012e29Smrg 12773f012e29Smrg fence_status.context = context_handle[0]; 12784babd585Smrg fence_status.ip_type = gc_ip_type; 12793f012e29Smrg fence_status.ip_instance = 0; 12803f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 12813f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 12823f012e29Smrg 500000000, 0, &expired); 12833f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12843f012e29Smrg CU_ASSERT_EQUAL(expired, true); 12853f012e29Smrg 12863f012e29Smrg /* 2. same engine different context */ 12873f012e29Smrg ptr = ib_result_cpu[0]; 1288d8807b2fSmrg ptr[0] = gfx_nop; 12893f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 12903f012e29Smrg ib_info[0].size = 1; 12913f012e29Smrg 12924babd585Smrg ibs_request[0].ip_type = gc_ip_type; 12933f012e29Smrg ibs_request[0].number_of_ibs = 1; 12943f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 12953f012e29Smrg ibs_request[0].resources = bo_list[0]; 12963f012e29Smrg ibs_request[0].fence_info.handle = NULL; 12973f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 12983f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12994babd585Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 13003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13013f012e29Smrg 13024babd585Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem); 13033f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13043f012e29Smrg ptr = ib_result_cpu[1]; 1305d8807b2fSmrg ptr[0] = gfx_nop; 13063f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 13073f012e29Smrg ib_info[1].size = 1; 13083f012e29Smrg 13094babd585Smrg ibs_request[1].ip_type = gc_ip_type; 13103f012e29Smrg ibs_request[1].number_of_ibs = 1; 13113f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 13123f012e29Smrg ibs_request[1].resources = bo_list[1]; 13133f012e29Smrg ibs_request[1].fence_info.handle = NULL; 13143f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 13153f012e29Smrg 13163f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13173f012e29Smrg 13183f012e29Smrg fence_status.context = context_handle[1]; 13194babd585Smrg fence_status.ip_type = gc_ip_type; 13203f012e29Smrg fence_status.ip_instance = 0; 13213f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 13223f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 13233f012e29Smrg 500000000, 0, &expired); 13243f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13253f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1326d8807b2fSmrg 13273f012e29Smrg for (i = 0; i < 2; i++) { 13283f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 13293f012e29Smrg ib_result_mc_address[i], 4096); 13303f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13313f012e29Smrg 13323f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 13333f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13343f012e29Smrg 13353f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 13363f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13373f012e29Smrg } 13383f012e29Smrg 13393f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 13403f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13413f012e29Smrg} 13423f012e29Smrg 13433f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 13443f012e29Smrg{ 13453f012e29Smrg amdgpu_context_handle context_handle; 13463f012e29Smrg amdgpu_bo_handle ib_result_handle; 13473f012e29Smrg void *ib_result_cpu; 13483f012e29Smrg uint64_t ib_result_mc_address; 13493f012e29Smrg struct amdgpu_cs_request ibs_request; 13503f012e29Smrg struct amdgpu_cs_ib_info ib_info; 13513f012e29Smrg struct amdgpu_cs_fence fence_status; 13523f012e29Smrg uint32_t *ptr; 13533f012e29Smrg uint32_t expired; 135400a23bdaSmrg int r, instance; 13553f012e29Smrg amdgpu_bo_list_handle bo_list; 13563f012e29Smrg amdgpu_va_handle va_handle; 1357d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1358d8807b2fSmrg 1359d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1360d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 13613f012e29Smrg 13623f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 13633f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13643f012e29Smrg 1365d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 13663f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 13673f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 13683f012e29Smrg &ib_result_handle, &ib_result_cpu, 13693f012e29Smrg &ib_result_mc_address, &va_handle); 13703f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13713f012e29Smrg 13723f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 13733f012e29Smrg &bo_list); 13743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13753f012e29Smrg 13763f012e29Smrg ptr = ib_result_cpu; 1377d8807b2fSmrg memset(ptr, 0, 16); 1378d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 13793f012e29Smrg 13803f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 13813f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 13823f012e29Smrg ib_info.size = 16; 13833f012e29Smrg 13843f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 13853f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 13863f012e29Smrg ibs_request.ring = instance; 13873f012e29Smrg ibs_request.number_of_ibs = 1; 13883f012e29Smrg ibs_request.ibs = &ib_info; 13893f012e29Smrg ibs_request.resources = bo_list; 13903f012e29Smrg ibs_request.fence_info.handle = NULL; 13913f012e29Smrg 13923f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 13933f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 13943f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13953f012e29Smrg 13963f012e29Smrg fence_status.context = context_handle; 13973f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 13983f012e29Smrg fence_status.ip_instance = 0; 13993f012e29Smrg fence_status.ring = instance; 14003f012e29Smrg fence_status.fence = ibs_request.seq_no; 14013f012e29Smrg 14023f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 14033f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 14043f012e29Smrg 0, &expired); 14053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14063f012e29Smrg 14073f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 14083f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14093f012e29Smrg 14103f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 14113f012e29Smrg ib_result_mc_address, 4096); 14123f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14133f012e29Smrg } 14143f012e29Smrg 14153f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 14163f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14173f012e29Smrg} 14183f012e29Smrg 14193f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 14203f012e29Smrg{ 14213f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 14223f012e29Smrg} 14233f012e29Smrg 14243f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 14253f012e29Smrg{ 14263f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 14273f012e29Smrg} 14283f012e29Smrg 14293f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 14303f012e29Smrg{ 14313f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 14323f012e29Smrg} 14333f012e29Smrg 14343f012e29Smrgstatic void amdgpu_command_submission_compute(void) 14353f012e29Smrg{ 14363f012e29Smrg /* write data using the CP */ 14373f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 14383f012e29Smrg /* const fill using the CP */ 14393f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 14403f012e29Smrg /* copy data using the CP */ 14413f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 14423f012e29Smrg /* nop test */ 14433f012e29Smrg amdgpu_command_submission_compute_nop(); 14443f012e29Smrg} 14453f012e29Smrg 14463f012e29Smrg/* 14473f012e29Smrg * caller need create/release: 14483f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 14493f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 14503f012e29Smrg */ 145141687f09Smrgvoid 145241687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 145341687f09Smrg amdgpu_context_handle context_handle, 145441687f09Smrg unsigned ip_type, int instance, int pm4_dw, 145541687f09Smrg uint32_t *pm4_src, int res_cnt, 145641687f09Smrg amdgpu_bo_handle *resources, 145741687f09Smrg struct amdgpu_cs_ib_info *ib_info, 145841687f09Smrg struct amdgpu_cs_request *ibs_request, 145941687f09Smrg bool secure) 14603f012e29Smrg{ 14613f012e29Smrg int r; 14623f012e29Smrg uint32_t expired; 14633f012e29Smrg uint32_t *ring_ptr; 14643f012e29Smrg amdgpu_bo_handle ib_result_handle; 14653f012e29Smrg void *ib_result_cpu; 14663f012e29Smrg uint64_t ib_result_mc_address; 14673f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 14683f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 14693f012e29Smrg amdgpu_va_handle va_handle; 14703f012e29Smrg 14713f012e29Smrg /* prepare CS */ 14723f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 14733f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 14743f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 14753f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 14763f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 14773f012e29Smrg 14783f012e29Smrg /* allocate IB */ 14793f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 14803f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 14813f012e29Smrg &ib_result_handle, &ib_result_cpu, 14823f012e29Smrg &ib_result_mc_address, &va_handle); 14833f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14843f012e29Smrg 14853f012e29Smrg /* copy PM4 packet to ring from caller */ 14863f012e29Smrg ring_ptr = ib_result_cpu; 14873f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 14883f012e29Smrg 14893f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 14903f012e29Smrg ib_info->size = pm4_dw; 149141687f09Smrg if (secure) 149241687f09Smrg ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 14933f012e29Smrg 14943f012e29Smrg ibs_request->ip_type = ip_type; 14953f012e29Smrg ibs_request->ring = instance; 14963f012e29Smrg ibs_request->number_of_ibs = 1; 14973f012e29Smrg ibs_request->ibs = ib_info; 14983f012e29Smrg ibs_request->fence_info.handle = NULL; 14993f012e29Smrg 15003f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 15013f012e29Smrg all_res[res_cnt] = ib_result_handle; 15023f012e29Smrg 15033f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 15043f012e29Smrg NULL, &ibs_request->resources); 15053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15063f012e29Smrg 15073f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15083f012e29Smrg 15093f012e29Smrg /* submit CS */ 15103f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 15113f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15123f012e29Smrg 15133f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 15143f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15153f012e29Smrg 15163f012e29Smrg fence_status.ip_type = ip_type; 15173f012e29Smrg fence_status.ip_instance = 0; 15183f012e29Smrg fence_status.ring = ibs_request->ring; 15193f012e29Smrg fence_status.context = context_handle; 15203f012e29Smrg fence_status.fence = ibs_request->seq_no; 15213f012e29Smrg 15223f012e29Smrg /* wait for IB accomplished */ 15233f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 15243f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 15253f012e29Smrg 0, &expired); 15263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15273f012e29Smrg CU_ASSERT_EQUAL(expired, true); 15283f012e29Smrg 15293f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 15303f012e29Smrg ib_result_mc_address, 4096); 15313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15323f012e29Smrg} 15333f012e29Smrg 153441687f09Smrgstatic void 153541687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 153641687f09Smrg unsigned ip_type, int instance, int pm4_dw, 153741687f09Smrg uint32_t *pm4_src, int res_cnt, 153841687f09Smrg amdgpu_bo_handle *resources, 153941687f09Smrg struct amdgpu_cs_ib_info *ib_info, 154041687f09Smrg struct amdgpu_cs_request *ibs_request) 154141687f09Smrg{ 154241687f09Smrg amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 154341687f09Smrg ip_type, instance, pm4_dw, pm4_src, 154441687f09Smrg res_cnt, resources, ib_info, 154541687f09Smrg ibs_request, false); 154641687f09Smrg} 154741687f09Smrg 154841687f09Smrgvoid 154941687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 155041687f09Smrg device, unsigned 155141687f09Smrg ip_type, bool secure) 15523f012e29Smrg{ 15533f012e29Smrg const int sdma_write_length = 128; 15543f012e29Smrg const int pm4_dw = 256; 15553f012e29Smrg amdgpu_context_handle context_handle; 15563f012e29Smrg amdgpu_bo_handle bo; 15573f012e29Smrg amdgpu_bo_handle *resources; 15583f012e29Smrg uint32_t *pm4; 15593f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 15603f012e29Smrg struct amdgpu_cs_request *ibs_request; 15613f012e29Smrg uint64_t bo_mc; 15623f012e29Smrg volatile uint32_t *bo_cpu; 156341687f09Smrg uint32_t bo_cpu_origin; 156400a23bdaSmrg int i, j, r, loop, ring_id; 15653f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 15663f012e29Smrg amdgpu_va_handle va_handle; 156700a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 15683f012e29Smrg 15693f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 15703f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 15713f012e29Smrg 15723f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 15733f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 15743f012e29Smrg 15753f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 15763f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15773f012e29Smrg 157841687f09Smrg r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 157900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 158000a23bdaSmrg 158141687f09Smrg for (i = 0; secure && (i < 2); i++) 158241687f09Smrg gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 158341687f09Smrg 158441687f09Smrg r = amdgpu_cs_ctx_create(device, &context_handle); 158541687f09Smrg 15863f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15873f012e29Smrg 15883f012e29Smrg /* prepare resource */ 15893f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 15903f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 15913f012e29Smrg 159200a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 159300a23bdaSmrg loop = 0; 159400a23bdaSmrg while(loop < 2) { 159500a23bdaSmrg /* allocate UC bo for sDMA use */ 159641687f09Smrg r = amdgpu_bo_alloc_and_map(device, 159700a23bdaSmrg sdma_write_length * sizeof(uint32_t), 159800a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 159900a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 160000a23bdaSmrg &bo_mc, &va_handle); 160100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 16023f012e29Smrg 160300a23bdaSmrg /* clear bo */ 160400a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 16053f012e29Smrg 160600a23bdaSmrg resources[0] = bo; 16073f012e29Smrg 160800a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 160900a23bdaSmrg i = j = 0; 161000a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 161100a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 161200a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 161300a23bdaSmrg sdma_write_length); 161400a23bdaSmrg else 161500a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 161641687f09Smrg SDMA_WRITE_SUB_OPCODE_LINEAR, 161741687f09Smrg secure ? SDMA_ATOMIC_TMZ(1) : 0); 161841687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 161900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 162000a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 162100a23bdaSmrg pm4[i++] = sdma_write_length - 1; 162200a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 162300a23bdaSmrg pm4[i++] = sdma_write_length; 162400a23bdaSmrg while(j++ < sdma_write_length) 162500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 162600a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 162700a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 162800a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 162900a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 163000a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 163100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 163200a23bdaSmrg while(j++ < sdma_write_length) 163300a23bdaSmrg pm4[i++] = 0xdeadbeaf; 163400a23bdaSmrg } 16353f012e29Smrg 163641687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 163741687f09Smrg ip_type, ring_id, i, pm4, 163841687f09Smrg 1, resources, ib_info, 163941687f09Smrg ibs_request, secure); 16403f012e29Smrg 164100a23bdaSmrg /* verify if SDMA test result meets with expected */ 164200a23bdaSmrg i = 0; 164341687f09Smrg if (!secure) { 164441687f09Smrg while(i < sdma_write_length) { 164541687f09Smrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 164641687f09Smrg } 164741687f09Smrg } else if (ip_type == AMDGPU_HW_IP_GFX) { 164841687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 164941687f09Smrg pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 165041687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 165141687f09Smrg * command, 1-loop_until_compare_satisfied. 165241687f09Smrg * single_pass_atomic, 0-lru 165341687f09Smrg * engine_sel, 0-micro_engine 165441687f09Smrg */ 165541687f09Smrg pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 165641687f09Smrg ATOMIC_MEM_COMMAND(1) | 165741687f09Smrg ATOMIC_MEM_CACHEPOLICAY(0) | 165841687f09Smrg ATOMIC_MEM_ENGINESEL(0)); 165941687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 166041687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 166141687f09Smrg pm4[i++] = 0x12345678; 166241687f09Smrg pm4[i++] = 0x0; 166341687f09Smrg pm4[i++] = 0xdeadbeaf; 166441687f09Smrg pm4[i++] = 0x0; 166541687f09Smrg pm4[i++] = 0x100; 166641687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 166741687f09Smrg ip_type, ring_id, i, pm4, 166841687f09Smrg 1, resources, ib_info, 166941687f09Smrg ibs_request, true); 167041687f09Smrg } else if (ip_type == AMDGPU_HW_IP_DMA) { 167141687f09Smrg /* restore the bo_cpu to compare */ 167241687f09Smrg bo_cpu_origin = bo_cpu[0]; 167341687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 167441687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 167541687f09Smrg * loop, 1-loop_until_compare_satisfied. 167641687f09Smrg * single_pass_atomic, 0-lru 167741687f09Smrg */ 167841687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 167941687f09Smrg 0, 168041687f09Smrg SDMA_ATOMIC_LOOP(1) | 168141687f09Smrg SDMA_ATOMIC_TMZ(1) | 168241687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 168341687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 168441687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 168541687f09Smrg pm4[i++] = 0x12345678; 168641687f09Smrg pm4[i++] = 0x0; 168741687f09Smrg pm4[i++] = 0xdeadbeaf; 168841687f09Smrg pm4[i++] = 0x0; 168941687f09Smrg pm4[i++] = 0x100; 169041687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 169141687f09Smrg ip_type, ring_id, i, pm4, 169241687f09Smrg 1, resources, ib_info, 169341687f09Smrg ibs_request, true); 169441687f09Smrg /* DMA's atomic behavir is unlike GFX 169541687f09Smrg * If the comparing data is not equal to destination data, 169641687f09Smrg * For GFX, loop again till gfx timeout(system hang). 169741687f09Smrg * For DMA, loop again till timer expired and then send interrupt. 169841687f09Smrg * So testcase can't use interrupt mechanism. 169941687f09Smrg * We take another way to verify. When the comparing data is not 170041687f09Smrg * equal to destination data, overwrite the source data to the destination 170141687f09Smrg * buffer. Otherwise, original destination data unchanged. 170241687f09Smrg * So if the bo_cpu data is overwritten, the result is passed. 170341687f09Smrg */ 170441687f09Smrg CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 170541687f09Smrg 170641687f09Smrg /* compare again for the case of dest_data != cmp_data */ 170741687f09Smrg i = 0; 170841687f09Smrg /* restore again, here dest_data should be */ 170941687f09Smrg bo_cpu_origin = bo_cpu[0]; 171041687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 171141687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 171241687f09Smrg 0, 171341687f09Smrg SDMA_ATOMIC_LOOP(1) | 171441687f09Smrg SDMA_ATOMIC_TMZ(1) | 171541687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 171641687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 171741687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 171841687f09Smrg pm4[i++] = 0x87654321; 171941687f09Smrg pm4[i++] = 0x0; 172041687f09Smrg pm4[i++] = 0xdeadbeaf; 172141687f09Smrg pm4[i++] = 0x0; 172241687f09Smrg pm4[i++] = 0x100; 172341687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 172441687f09Smrg ip_type, ring_id, i, pm4, 172541687f09Smrg 1, resources, ib_info, 172641687f09Smrg ibs_request, true); 172741687f09Smrg /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 172841687f09Smrg CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 172900a23bdaSmrg } 17303f012e29Smrg 173100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 173200a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 173300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 173400a23bdaSmrg loop++; 17353f012e29Smrg } 17363f012e29Smrg } 17373f012e29Smrg /* clean resources */ 17383f012e29Smrg free(resources); 17393f012e29Smrg free(ibs_request); 17403f012e29Smrg free(ib_info); 17413f012e29Smrg free(pm4); 17423f012e29Smrg 17433f012e29Smrg /* end of test */ 17443f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 17453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17463f012e29Smrg} 17473f012e29Smrg 174841687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 174941687f09Smrg{ 175041687f09Smrg amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 175141687f09Smrg ip_type, 175241687f09Smrg false); 175341687f09Smrg} 175441687f09Smrg 17553f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 17563f012e29Smrg{ 17573f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 17583f012e29Smrg} 17593f012e29Smrg 17603f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 17613f012e29Smrg{ 17623f012e29Smrg const int sdma_write_length = 1024 * 1024; 17633f012e29Smrg const int pm4_dw = 256; 17643f012e29Smrg amdgpu_context_handle context_handle; 17653f012e29Smrg amdgpu_bo_handle bo; 17663f012e29Smrg amdgpu_bo_handle *resources; 17673f012e29Smrg uint32_t *pm4; 17683f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 17693f012e29Smrg struct amdgpu_cs_request *ibs_request; 17703f012e29Smrg uint64_t bo_mc; 17713f012e29Smrg volatile uint32_t *bo_cpu; 177200a23bdaSmrg int i, j, r, loop, ring_id; 17733f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 17743f012e29Smrg amdgpu_va_handle va_handle; 177500a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 17763f012e29Smrg 17773f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 17783f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 17793f012e29Smrg 17803f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 17813f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 17823f012e29Smrg 17833f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 17843f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 17853f012e29Smrg 178600a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 178700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 178800a23bdaSmrg 17893f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 17903f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17913f012e29Smrg 17923f012e29Smrg /* prepare resource */ 17933f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 17943f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 17953f012e29Smrg 179600a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 179700a23bdaSmrg loop = 0; 179800a23bdaSmrg while(loop < 2) { 179900a23bdaSmrg /* allocate UC bo for sDMA use */ 180000a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 180100a23bdaSmrg sdma_write_length, 4096, 180200a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 180300a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 180400a23bdaSmrg &bo_mc, &va_handle); 180500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 18063f012e29Smrg 180700a23bdaSmrg /* clear bo */ 180800a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 18093f012e29Smrg 181000a23bdaSmrg resources[0] = bo; 18113f012e29Smrg 181200a23bdaSmrg /* fulfill PM4: test DMA const fill */ 181300a23bdaSmrg i = j = 0; 181400a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 181500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 181600a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 181700a23bdaSmrg 0, 0, 0, 181800a23bdaSmrg sdma_write_length / 4); 181900a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 182000a23bdaSmrg pm4[i++] = 0xdeadbeaf; 182100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 182200a23bdaSmrg } else { 182300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 182400a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 182500a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 182600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 182700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 182800a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 182900a23bdaSmrg pm4[i++] = sdma_write_length - 1; 183000a23bdaSmrg else 183100a23bdaSmrg pm4[i++] = sdma_write_length; 183200a23bdaSmrg } 183300a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 183400a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 183500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 183600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 183700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 183800a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 183900a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 184000a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 184100a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 184200a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 184300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1844d8807b2fSmrg pm4[i++] = sdma_write_length; 184500a23bdaSmrg } else { 184600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 184700a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 184800a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 184900a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 185000a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 185100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 185200a23bdaSmrg pm4[i++] = 0; 185300a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 185400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 185500a23bdaSmrg pm4[i++] = sdma_write_length; 185600a23bdaSmrg } 1857d8807b2fSmrg } 18583f012e29Smrg 185900a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 186000a23bdaSmrg ip_type, ring_id, 186100a23bdaSmrg i, pm4, 186200a23bdaSmrg 1, resources, 186300a23bdaSmrg ib_info, ibs_request); 18643f012e29Smrg 186500a23bdaSmrg /* verify if SDMA test result meets with expected */ 186600a23bdaSmrg i = 0; 186700a23bdaSmrg while(i < (sdma_write_length / 4)) { 186800a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 186900a23bdaSmrg } 18703f012e29Smrg 187100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 187200a23bdaSmrg sdma_write_length); 187300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 187400a23bdaSmrg loop++; 187500a23bdaSmrg } 18763f012e29Smrg } 18773f012e29Smrg /* clean resources */ 18783f012e29Smrg free(resources); 18793f012e29Smrg free(ibs_request); 18803f012e29Smrg free(ib_info); 18813f012e29Smrg free(pm4); 18823f012e29Smrg 18833f012e29Smrg /* end of test */ 18843f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 18853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18863f012e29Smrg} 18873f012e29Smrg 18883f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 18893f012e29Smrg{ 18903f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 18913f012e29Smrg} 18923f012e29Smrg 18933f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 18943f012e29Smrg{ 18953f012e29Smrg const int sdma_write_length = 1024; 18963f012e29Smrg const int pm4_dw = 256; 18973f012e29Smrg amdgpu_context_handle context_handle; 18983f012e29Smrg amdgpu_bo_handle bo1, bo2; 18993f012e29Smrg amdgpu_bo_handle *resources; 19003f012e29Smrg uint32_t *pm4; 19013f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 19023f012e29Smrg struct amdgpu_cs_request *ibs_request; 19033f012e29Smrg uint64_t bo1_mc, bo2_mc; 19043f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 190500a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 19063f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 19073f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 190800a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 19093f012e29Smrg 19103f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 19113f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 19123f012e29Smrg 19133f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 19143f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 19153f012e29Smrg 19163f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 19173f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 19183f012e29Smrg 191900a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 192000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 192100a23bdaSmrg 19223f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 19233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 19243f012e29Smrg 19253f012e29Smrg /* prepare resource */ 19263f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 19273f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 19283f012e29Smrg 192900a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 193000a23bdaSmrg loop1 = loop2 = 0; 193100a23bdaSmrg /* run 9 circle to test all mapping combination */ 193200a23bdaSmrg while(loop1 < 2) { 193300a23bdaSmrg while(loop2 < 2) { 193400a23bdaSmrg /* allocate UC bo1for sDMA use */ 193500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 193600a23bdaSmrg sdma_write_length, 4096, 193700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 193800a23bdaSmrg gtt_flags[loop1], &bo1, 193900a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 194000a23bdaSmrg &bo1_va_handle); 194100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 194200a23bdaSmrg 194300a23bdaSmrg /* set bo1 */ 194400a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 194500a23bdaSmrg 194600a23bdaSmrg /* allocate UC bo2 for sDMA use */ 194700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 194800a23bdaSmrg sdma_write_length, 4096, 194900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 195000a23bdaSmrg gtt_flags[loop2], &bo2, 195100a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 195200a23bdaSmrg &bo2_va_handle); 195300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 195400a23bdaSmrg 195500a23bdaSmrg /* clear bo2 */ 195600a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 195700a23bdaSmrg 195800a23bdaSmrg resources[0] = bo1; 195900a23bdaSmrg resources[1] = bo2; 196000a23bdaSmrg 196100a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 196200a23bdaSmrg i = j = 0; 196300a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 196400a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 196500a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 196600a23bdaSmrg 0, 0, 0, 196700a23bdaSmrg sdma_write_length); 196800a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 196900a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 197000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 197100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 197200a23bdaSmrg } else { 197300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 197400a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 197500a23bdaSmrg 0); 197600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 197700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 197800a23bdaSmrg else 197900a23bdaSmrg pm4[i++] = sdma_write_length; 198000a23bdaSmrg pm4[i++] = 0; 198100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 198200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 198300a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 198400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 198500a23bdaSmrg } 198600a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 198700a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 198800a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 198900a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 199000a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 199100a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 199200a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 199300a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 199400a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 199500a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 199600a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 199700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1998d8807b2fSmrg pm4[i++] = sdma_write_length; 199900a23bdaSmrg } else { 200000a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 200100a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 200200a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 200300a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 200400a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 200500a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 200600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 200700a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 200800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 200900a23bdaSmrg pm4[i++] = sdma_write_length; 201000a23bdaSmrg } 2011d8807b2fSmrg } 20123f012e29Smrg 201300a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 201400a23bdaSmrg ip_type, ring_id, 201500a23bdaSmrg i, pm4, 201600a23bdaSmrg 2, resources, 201700a23bdaSmrg ib_info, ibs_request); 20183f012e29Smrg 201900a23bdaSmrg /* verify if SDMA test result meets with expected */ 202000a23bdaSmrg i = 0; 202100a23bdaSmrg while(i < sdma_write_length) { 202200a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 202300a23bdaSmrg } 202400a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 202500a23bdaSmrg sdma_write_length); 202600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 202700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 202800a23bdaSmrg sdma_write_length); 202900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 203000a23bdaSmrg loop2++; 20313f012e29Smrg } 203200a23bdaSmrg loop1++; 20333f012e29Smrg } 20343f012e29Smrg } 20353f012e29Smrg /* clean resources */ 20363f012e29Smrg free(resources); 20373f012e29Smrg free(ibs_request); 20383f012e29Smrg free(ib_info); 20393f012e29Smrg free(pm4); 20403f012e29Smrg 20413f012e29Smrg /* end of test */ 20423f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 20433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20443f012e29Smrg} 20453f012e29Smrg 20463f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 20473f012e29Smrg{ 20483f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 20493f012e29Smrg} 20503f012e29Smrg 20513f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 20523f012e29Smrg{ 20533f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 20543f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 20553f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 20563f012e29Smrg} 20573f012e29Smrg 2058d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 2059d8807b2fSmrg{ 2060d8807b2fSmrg amdgpu_context_handle context_handle; 2061d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 2062d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 2063d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 2064d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 2065d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 2066d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 2067d8807b2fSmrg uint32_t *ptr; 2068d8807b2fSmrg uint32_t expired; 2069d8807b2fSmrg amdgpu_bo_list_handle bo_list; 2070d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 2071d8807b2fSmrg int r; 2072d8807b2fSmrg int i = 0, ib_cs_num = 2; 2073d8807b2fSmrg 2074d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2075d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2076d8807b2fSmrg 2077d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2078d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 2079d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 2080d8807b2fSmrg &ib_result_mc_address, &va_handle); 2081d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2082d8807b2fSmrg 2083d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2084d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 2085d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 2086d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 2087d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2088d8807b2fSmrg 2089d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 2090d8807b2fSmrg ib_result_ce_handle, &bo_list); 2091d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2092d8807b2fSmrg 2093d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 2094d8807b2fSmrg 2095d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 2096d8807b2fSmrg ptr = ib_result_ce_cpu; 2097d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 2098d8807b2fSmrg ptr[i++] = 0xc0008900; 2099d8807b2fSmrg ptr[i++] = 0; 2100d8807b2fSmrg } 2101d8807b2fSmrg ptr[i++] = 0xc0008400; 2102d8807b2fSmrg ptr[i++] = 1; 2103d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 2104d8807b2fSmrg ib_info[0].size = i; 2105d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 2106d8807b2fSmrg 2107d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 2108d8807b2fSmrg ptr = ib_result_cpu; 2109d8807b2fSmrg ptr[0] = 0xc0008600; 2110d8807b2fSmrg ptr[1] = 0x00000001; 2111d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 2112d8807b2fSmrg ib_info[1].size = 2; 2113d8807b2fSmrg 2114d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 2115d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 2116d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 2117d8807b2fSmrg ibs_request[i].ibs = ib_info; 2118d8807b2fSmrg ibs_request[i].resources = bo_list; 2119d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 2120d8807b2fSmrg } 2121d8807b2fSmrg 2122d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 2123d8807b2fSmrg 2124d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2125d8807b2fSmrg 2126d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 2127d8807b2fSmrg fence_status[i].context = context_handle; 2128d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 2129d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 2130d8807b2fSmrg } 2131d8807b2fSmrg 2132d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 2133d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 2134d8807b2fSmrg &expired, NULL); 2135d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2136d8807b2fSmrg 2137d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2138d8807b2fSmrg ib_result_mc_address, 4096); 2139d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2140d8807b2fSmrg 2141d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 2142d8807b2fSmrg ib_result_ce_mc_address, 4096); 2143d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2144d8807b2fSmrg 2145d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 2146d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2147d8807b2fSmrg 2148d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 2149d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2150d8807b2fSmrg} 2151d8807b2fSmrg 2152d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 2153d8807b2fSmrg{ 2154d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 2155d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 2156d8807b2fSmrg} 2157d8807b2fSmrg 21583f012e29Smrgstatic void amdgpu_userptr_test(void) 21593f012e29Smrg{ 21603f012e29Smrg int i, r, j; 21613f012e29Smrg uint32_t *pm4 = NULL; 21623f012e29Smrg uint64_t bo_mc; 21633f012e29Smrg void *ptr = NULL; 21643f012e29Smrg int pm4_dw = 256; 21653f012e29Smrg int sdma_write_length = 4; 21663f012e29Smrg amdgpu_bo_handle handle; 21673f012e29Smrg amdgpu_context_handle context_handle; 21683f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 21693f012e29Smrg struct amdgpu_cs_request *ibs_request; 21703f012e29Smrg amdgpu_bo_handle buf_handle; 21713f012e29Smrg amdgpu_va_handle va_handle; 21723f012e29Smrg 21733f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 21743f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 21753f012e29Smrg 21763f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 21773f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 21783f012e29Smrg 21793f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 21803f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 21813f012e29Smrg 21823f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 21833f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21843f012e29Smrg 21853f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 21863f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 21873f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 21883f012e29Smrg 21893f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 21903f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 21913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21923f012e29Smrg 21933f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 21943f012e29Smrg amdgpu_gpu_va_range_general, 21953f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 21963f012e29Smrg &va_handle, 0); 21973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21983f012e29Smrg 21993f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 22003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 22013f012e29Smrg 22023f012e29Smrg handle = buf_handle; 22033f012e29Smrg 22043f012e29Smrg j = i = 0; 2205d8807b2fSmrg 2206d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 2207d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2208d8807b2fSmrg sdma_write_length); 2209d8807b2fSmrg else 2210d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2211d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 22123f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 22133f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2214d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 2215d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 2216d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 2217d8807b2fSmrg pm4[i++] = sdma_write_length; 22183f012e29Smrg 22193f012e29Smrg while (j++ < sdma_write_length) 22203f012e29Smrg pm4[i++] = 0xdeadbeaf; 22213f012e29Smrg 222200a23bdaSmrg if (!fork()) { 222300a23bdaSmrg pm4[0] = 0x0; 222400a23bdaSmrg exit(0); 222500a23bdaSmrg } 222600a23bdaSmrg 22273f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 22283f012e29Smrg AMDGPU_HW_IP_DMA, 0, 22293f012e29Smrg i, pm4, 22303f012e29Smrg 1, &handle, 22313f012e29Smrg ib_info, ibs_request); 22323f012e29Smrg i = 0; 22333f012e29Smrg while (i < sdma_write_length) { 22343f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 22353f012e29Smrg } 22363f012e29Smrg free(ibs_request); 22373f012e29Smrg free(ib_info); 22383f012e29Smrg free(pm4); 22393f012e29Smrg 22403f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 22413f012e29Smrg CU_ASSERT_EQUAL(r, 0); 22423f012e29Smrg r = amdgpu_va_range_free(va_handle); 22433f012e29Smrg CU_ASSERT_EQUAL(r, 0); 22443f012e29Smrg r = amdgpu_bo_free(buf_handle); 22453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 22463f012e29Smrg free(ptr); 22473f012e29Smrg 22483f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 22493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 225000a23bdaSmrg 225100a23bdaSmrg wait(NULL); 225200a23bdaSmrg} 225300a23bdaSmrg 225400a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 225500a23bdaSmrg{ 225600a23bdaSmrg amdgpu_context_handle context_handle[2]; 225700a23bdaSmrg amdgpu_bo_handle ib_result_handle; 225800a23bdaSmrg void *ib_result_cpu; 225900a23bdaSmrg uint64_t ib_result_mc_address; 226000a23bdaSmrg struct amdgpu_cs_request ibs_request; 226100a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 226200a23bdaSmrg struct amdgpu_cs_fence fence_status; 226300a23bdaSmrg uint32_t expired; 226400a23bdaSmrg int i, j, r; 226500a23bdaSmrg amdgpu_bo_list_handle bo_list; 226600a23bdaSmrg amdgpu_va_handle va_handle; 226700a23bdaSmrg static uint32_t *ptr; 226800a23bdaSmrg uint64_t seq_no; 226900a23bdaSmrg 227000a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 227100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 227200a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 227300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 227400a23bdaSmrg 227500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 227600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 227700a23bdaSmrg &ib_result_handle, &ib_result_cpu, 227800a23bdaSmrg &ib_result_mc_address, &va_handle); 227900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 228000a23bdaSmrg 228100a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 228200a23bdaSmrg &bo_list); 228300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 228400a23bdaSmrg 228500a23bdaSmrg ptr = ib_result_cpu; 228600a23bdaSmrg i = 0; 228700a23bdaSmrg 228800a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 228900a23bdaSmrg 229000a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 229100a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 229200a23bdaSmrg ptr[i++] = 0x80000000; 229300a23bdaSmrg ptr[i++] = 0x80000000; 229400a23bdaSmrg 229500a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 229600a23bdaSmrg ptr[i++] = 0x80000000; 229700a23bdaSmrg 229800a23bdaSmrg 229900a23bdaSmrg /* Program compute regs */ 230000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 230100a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 230200a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 230300a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 230400a23bdaSmrg 230500a23bdaSmrg 230600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 230700a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 230800a23bdaSmrg /* 230900a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 231000a23bdaSmrg SGPRS = 1 231100a23bdaSmrg PRIORITY = 0 231200a23bdaSmrg FLOAT_MODE = 192 (0xc0) 231300a23bdaSmrg PRIV = 0 231400a23bdaSmrg DX10_CLAMP = 1 231500a23bdaSmrg DEBUG_MODE = 0 231600a23bdaSmrg IEEE_MODE = 0 231700a23bdaSmrg BULKY = 0 231800a23bdaSmrg CDBG_USER = 0 231900a23bdaSmrg * 232000a23bdaSmrg */ 232100a23bdaSmrg ptr[i++] = 0x002c0040; 232200a23bdaSmrg 232300a23bdaSmrg 232400a23bdaSmrg /* 232500a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 232600a23bdaSmrg USER_SGPR = 8 232700a23bdaSmrg TRAP_PRESENT = 0 232800a23bdaSmrg TGID_X_EN = 0 232900a23bdaSmrg TGID_Y_EN = 0 233000a23bdaSmrg TGID_Z_EN = 0 233100a23bdaSmrg TG_SIZE_EN = 0 233200a23bdaSmrg TIDIG_COMP_CNT = 0 233300a23bdaSmrg EXCP_EN_MSB = 0 233400a23bdaSmrg LDS_SIZE = 0 233500a23bdaSmrg EXCP_EN = 0 233600a23bdaSmrg * 233700a23bdaSmrg */ 233800a23bdaSmrg ptr[i++] = 0x00000010; 233900a23bdaSmrg 234000a23bdaSmrg 234100a23bdaSmrg/* 234200a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 234300a23bdaSmrg WAVESIZE = 0 234400a23bdaSmrg * 234500a23bdaSmrg */ 234600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 234700a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 234800a23bdaSmrg ptr[i++] = 0x00000100; 234900a23bdaSmrg 235000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 235100a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 235200a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 235300a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 235400a23bdaSmrg 235500a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 235600a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 235700a23bdaSmrg ptr[i++] = 0; 235800a23bdaSmrg 235900a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 236000a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 236100a23bdaSmrg ptr[i++] = 1; 236200a23bdaSmrg ptr[i++] = 1; 236300a23bdaSmrg ptr[i++] = 1; 236400a23bdaSmrg 236500a23bdaSmrg 236600a23bdaSmrg /* Dispatch */ 236700a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 236800a23bdaSmrg ptr[i++] = 1; 236900a23bdaSmrg ptr[i++] = 1; 237000a23bdaSmrg ptr[i++] = 1; 237100a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 237200a23bdaSmrg 237300a23bdaSmrg 237400a23bdaSmrg while (i & 7) 237500a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 237600a23bdaSmrg 237700a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 237800a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 237900a23bdaSmrg ib_info.size = i; 238000a23bdaSmrg 238100a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 238200a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 238300a23bdaSmrg ibs_request.ring = 0; 238400a23bdaSmrg ibs_request.number_of_ibs = 1; 238500a23bdaSmrg ibs_request.ibs = &ib_info; 238600a23bdaSmrg ibs_request.resources = bo_list; 238700a23bdaSmrg ibs_request.fence_info.handle = NULL; 238800a23bdaSmrg 238900a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 239000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 239100a23bdaSmrg seq_no = ibs_request.seq_no; 239200a23bdaSmrg 239300a23bdaSmrg 239400a23bdaSmrg 239500a23bdaSmrg /* Prepare second command with dependency on the first */ 239600a23bdaSmrg j = i; 239700a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 239800a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 239900a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 240000a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 240100a23bdaSmrg ptr[i++] = 99; 240200a23bdaSmrg 240300a23bdaSmrg while (i & 7) 240400a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 240500a23bdaSmrg 240600a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 240700a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 240800a23bdaSmrg ib_info.size = i - j; 240900a23bdaSmrg 241000a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 241100a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 241200a23bdaSmrg ibs_request.ring = 0; 241300a23bdaSmrg ibs_request.number_of_ibs = 1; 241400a23bdaSmrg ibs_request.ibs = &ib_info; 241500a23bdaSmrg ibs_request.resources = bo_list; 241600a23bdaSmrg ibs_request.fence_info.handle = NULL; 241700a23bdaSmrg 241800a23bdaSmrg ibs_request.number_of_dependencies = 1; 241900a23bdaSmrg 242000a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 242100a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 242200a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 242300a23bdaSmrg ibs_request.dependencies[0].ring = 0; 242400a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 242500a23bdaSmrg 242600a23bdaSmrg 242700a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 242800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 242900a23bdaSmrg 243000a23bdaSmrg 243100a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 243200a23bdaSmrg fence_status.context = context_handle[0]; 243300a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 243400a23bdaSmrg fence_status.ip_instance = 0; 243500a23bdaSmrg fence_status.ring = 0; 243600a23bdaSmrg fence_status.fence = ibs_request.seq_no; 243700a23bdaSmrg 243800a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 243900a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 244000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 244100a23bdaSmrg 244200a23bdaSmrg /* Expect the second command to wait for shader to complete */ 244300a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 244400a23bdaSmrg 244500a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 244600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 244700a23bdaSmrg 244800a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 244900a23bdaSmrg ib_result_mc_address, 4096); 245000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 245100a23bdaSmrg 245200a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 245300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 245400a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 245500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 245600a23bdaSmrg 245700a23bdaSmrg free(ibs_request.dependencies); 24583f012e29Smrg} 24595324fb0dSmrg 24609bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 24619bd392adSmrg{ 24629bd392adSmrg struct amdgpu_test_shader *shader; 24639bd392adSmrg int i, loop = 0x10000; 24649bd392adSmrg 24659bd392adSmrg switch (family) { 24669bd392adSmrg case AMDGPU_FAMILY_AI: 24679bd392adSmrg shader = &memcpy_cs_hang_slow_ai; 24689bd392adSmrg break; 24699bd392adSmrg case AMDGPU_FAMILY_RV: 24709bd392adSmrg shader = &memcpy_cs_hang_slow_rv; 24719bd392adSmrg break; 24720ed5401bSmrg case AMDGPU_FAMILY_NV: 24730ed5401bSmrg shader = &memcpy_cs_hang_slow_nv; 24740ed5401bSmrg break; 24759bd392adSmrg default: 24769bd392adSmrg return -1; 24779bd392adSmrg break; 24789bd392adSmrg } 24799bd392adSmrg 24809bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 24819bd392adSmrg 24829bd392adSmrg for (i = 0; i < loop; i++) 24839bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 24849bd392adSmrg shader->shader + shader->header_length, 24859bd392adSmrg shader->body_length * sizeof(uint32_t)); 24869bd392adSmrg 24879bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 24889bd392adSmrg shader->shader + shader->header_length + shader->body_length, 24899bd392adSmrg shader->foot_length * sizeof(uint32_t)); 24909bd392adSmrg 24919bd392adSmrg return 0; 24929bd392adSmrg} 24939bd392adSmrg 24945324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 24950ed5401bSmrg int cs_type, 24960ed5401bSmrg uint32_t version) 24975324fb0dSmrg{ 24985324fb0dSmrg uint32_t shader_size; 24995324fb0dSmrg const uint32_t *shader; 25005324fb0dSmrg 25015324fb0dSmrg switch (cs_type) { 25025324fb0dSmrg case CS_BUFFERCLEAR: 25030ed5401bSmrg if (version == 9) { 25040ed5401bSmrg shader = bufferclear_cs_shader_gfx9; 25050ed5401bSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 25060ed5401bSmrg } else if (version == 10) { 25070ed5401bSmrg shader = bufferclear_cs_shader_gfx10; 25080ed5401bSmrg shader_size = sizeof(bufferclear_cs_shader_gfx10); 25090ed5401bSmrg } 25105324fb0dSmrg break; 25115324fb0dSmrg case CS_BUFFERCOPY: 25120ed5401bSmrg if (version == 9) { 25130ed5401bSmrg shader = buffercopy_cs_shader_gfx9; 25140ed5401bSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 25150ed5401bSmrg } else if (version == 10) { 25160ed5401bSmrg shader = buffercopy_cs_shader_gfx10; 25170ed5401bSmrg shader_size = sizeof(buffercopy_cs_shader_gfx10); 25180ed5401bSmrg } 25195324fb0dSmrg break; 25209bd392adSmrg case CS_HANG: 25219bd392adSmrg shader = memcpy_ps_hang; 25229bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 25239bd392adSmrg break; 25245324fb0dSmrg default: 25255324fb0dSmrg return -1; 25265324fb0dSmrg break; 25275324fb0dSmrg } 25285324fb0dSmrg 25295324fb0dSmrg memcpy(ptr, shader, shader_size); 25305324fb0dSmrg return 0; 25315324fb0dSmrg} 25325324fb0dSmrg 25330ed5401bSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version) 25345324fb0dSmrg{ 25355324fb0dSmrg int i = 0; 25365324fb0dSmrg 25375324fb0dSmrg /* Write context control and load shadowing register if necessary */ 25385324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 25395324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 25405324fb0dSmrg ptr[i++] = 0x80000000; 25415324fb0dSmrg ptr[i++] = 0x80000000; 25425324fb0dSmrg } 25435324fb0dSmrg 25445324fb0dSmrg /* Issue commands to set default compute state. */ 25455324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 25465324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 25475324fb0dSmrg ptr[i++] = 0x204; 25485324fb0dSmrg i += 3; 254988f8a8d2Smrg 25505324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 25515324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 25525324fb0dSmrg ptr[i++] = 0x218; 25535324fb0dSmrg ptr[i++] = 0; 25545324fb0dSmrg 25550ed5401bSmrg /* Set new sh registers in GFX10 to 0 */ 25560ed5401bSmrg if (version == 10) { 25570ed5401bSmrg /* mmCOMPUTE_SHADER_CHKSUM */ 25580ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 25590ed5401bSmrg ptr[i++] = 0x22a; 25600ed5401bSmrg ptr[i++] = 0; 25610ed5401bSmrg /* mmCOMPUTE_REQ_CTRL */ 25620ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6); 25630ed5401bSmrg ptr[i++] = 0x222; 25640ed5401bSmrg i += 6; 25650ed5401bSmrg /* mmCP_COHER_START_DELAY */ 25660ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 25670ed5401bSmrg ptr[i++] = 0x7b; 25680ed5401bSmrg ptr[i++] = 0x20; 25690ed5401bSmrg } 25705324fb0dSmrg return i; 25715324fb0dSmrg} 25725324fb0dSmrg 25730ed5401bSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version) 25745324fb0dSmrg{ 25755324fb0dSmrg int i = 0; 25765324fb0dSmrg 25775324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 25780ed5401bSmrg if (version == 9) { 25790ed5401bSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 25800ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 25810ed5401bSmrg ptr[i++] = 0x216; 25820ed5401bSmrg ptr[i++] = 0xffffffff; 25830ed5401bSmrg ptr[i++] = 0xffffffff; 25840ed5401bSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 25850ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 25860ed5401bSmrg ptr[i++] = 0x219; 25870ed5401bSmrg ptr[i++] = 0xffffffff; 25880ed5401bSmrg ptr[i++] = 0xffffffff; 25890ed5401bSmrg } else if (version == 10) { 25900ed5401bSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 25910ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); 25920ed5401bSmrg ptr[i++] = 0x30000216; 25930ed5401bSmrg ptr[i++] = 0xffffffff; 25940ed5401bSmrg ptr[i++] = 0xffffffff; 25950ed5401bSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 25960ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); 25970ed5401bSmrg ptr[i++] = 0x30000219; 25980ed5401bSmrg ptr[i++] = 0xffffffff; 25990ed5401bSmrg ptr[i++] = 0xffffffff; 26000ed5401bSmrg } 26015324fb0dSmrg 26025324fb0dSmrg return i; 26035324fb0dSmrg} 26045324fb0dSmrg 26050ed5401bSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version) 26065324fb0dSmrg{ 26075324fb0dSmrg int i, j; 26085324fb0dSmrg 26095324fb0dSmrg i = 0; 26105324fb0dSmrg 26115324fb0dSmrg /* Writes shader state to HW */ 26125324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 26135324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 26145324fb0dSmrg ptr[i++] = 0x20c; 26155324fb0dSmrg ptr[i++] = (shader_addr >> 8); 26165324fb0dSmrg ptr[i++] = (shader_addr >> 40); 26175324fb0dSmrg /* write sh regs*/ 26185324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 26195324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 26205324fb0dSmrg /* - Gfx9ShRegBase */ 26215324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 26225324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 26235324fb0dSmrg } 26245324fb0dSmrg 26250ed5401bSmrg if (version == 10) { 26260ed5401bSmrg /* mmCOMPUTE_PGM_RSRC3 */ 26270ed5401bSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 26280ed5401bSmrg ptr[i++] = 0x228; 26290ed5401bSmrg ptr[i++] = 0; 26300ed5401bSmrg } 26310ed5401bSmrg 26325324fb0dSmrg return i; 26335324fb0dSmrg} 26345324fb0dSmrg 26355324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 26365324fb0dSmrg uint32_t ip_type, 26370ed5401bSmrg uint32_t ring, 26380ed5401bSmrg uint32_t version) 26395324fb0dSmrg{ 26405324fb0dSmrg amdgpu_context_handle context_handle; 26415324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 26425324fb0dSmrg volatile unsigned char *ptr_dst; 26435324fb0dSmrg void *ptr_shader; 26445324fb0dSmrg uint32_t *ptr_cmd; 26455324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 26465324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 26475324fb0dSmrg int i, r; 26485324fb0dSmrg int bo_dst_size = 16384; 26495324fb0dSmrg int bo_shader_size = 4096; 26505324fb0dSmrg int bo_cmd_size = 4096; 26515324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 26525324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 26535324fb0dSmrg amdgpu_bo_list_handle bo_list; 26545324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 26555324fb0dSmrg uint32_t expired; 26565324fb0dSmrg 26575324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 26585324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26595324fb0dSmrg 26605324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 26615324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 26625324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 26635324fb0dSmrg &mc_address_cmd, &va_cmd); 26645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26655324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 26665324fb0dSmrg 26675324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 26685324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26695324fb0dSmrg &bo_shader, &ptr_shader, 26705324fb0dSmrg &mc_address_shader, &va_shader); 26715324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 267288f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 26735324fb0dSmrg 26740ed5401bSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version); 26755324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26765324fb0dSmrg 26775324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26785324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26795324fb0dSmrg &bo_dst, (void **)&ptr_dst, 26805324fb0dSmrg &mc_address_dst, &va_dst); 26815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26825324fb0dSmrg 26835324fb0dSmrg i = 0; 26840ed5401bSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 26855324fb0dSmrg 26865324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 26870ed5401bSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 26885324fb0dSmrg 26895324fb0dSmrg /* Writes shader state to HW */ 26900ed5401bSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 26915324fb0dSmrg 26925324fb0dSmrg /* Write constant data */ 26935324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 26945324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26955324fb0dSmrg ptr_cmd[i++] = 0x240; 26965324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 26975324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 26985324fb0dSmrg ptr_cmd[i++] = 0x400; 26990ed5401bSmrg if (version == 9) 27000ed5401bSmrg ptr_cmd[i++] = 0x74fac; 27010ed5401bSmrg else if (version == 10) 27020ed5401bSmrg ptr_cmd[i++] = 0x1104bfac; 27035324fb0dSmrg 27045324fb0dSmrg /* Sets a range of pixel shader constants */ 27055324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 27065324fb0dSmrg ptr_cmd[i++] = 0x244; 27075324fb0dSmrg ptr_cmd[i++] = 0x22222222; 27085324fb0dSmrg ptr_cmd[i++] = 0x22222222; 27095324fb0dSmrg ptr_cmd[i++] = 0x22222222; 27105324fb0dSmrg ptr_cmd[i++] = 0x22222222; 27115324fb0dSmrg 271288f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 271388f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 271488f8a8d2Smrg ptr_cmd[i++] = 0x215; 271588f8a8d2Smrg ptr_cmd[i++] = 0; 271688f8a8d2Smrg 27175324fb0dSmrg /* dispatch direct command */ 27185324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 27195324fb0dSmrg ptr_cmd[i++] = 0x10; 27205324fb0dSmrg ptr_cmd[i++] = 1; 27215324fb0dSmrg ptr_cmd[i++] = 1; 27225324fb0dSmrg ptr_cmd[i++] = 1; 27235324fb0dSmrg 27245324fb0dSmrg while (i & 7) 27255324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 27265324fb0dSmrg 27275324fb0dSmrg resources[0] = bo_dst; 27285324fb0dSmrg resources[1] = bo_shader; 27295324fb0dSmrg resources[2] = bo_cmd; 27305324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 27315324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27325324fb0dSmrg 27335324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 27345324fb0dSmrg ib_info.size = i; 27355324fb0dSmrg ibs_request.ip_type = ip_type; 27365324fb0dSmrg ibs_request.ring = ring; 27375324fb0dSmrg ibs_request.resources = bo_list; 27385324fb0dSmrg ibs_request.number_of_ibs = 1; 27395324fb0dSmrg ibs_request.ibs = &ib_info; 27405324fb0dSmrg ibs_request.fence_info.handle = NULL; 27415324fb0dSmrg 27425324fb0dSmrg /* submit CS */ 27435324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 27445324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27455324fb0dSmrg 27465324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 27475324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27485324fb0dSmrg 27495324fb0dSmrg fence_status.ip_type = ip_type; 27505324fb0dSmrg fence_status.ip_instance = 0; 27515324fb0dSmrg fence_status.ring = ring; 27525324fb0dSmrg fence_status.context = context_handle; 27535324fb0dSmrg fence_status.fence = ibs_request.seq_no; 27545324fb0dSmrg 27555324fb0dSmrg /* wait for IB accomplished */ 27565324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 27575324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 27585324fb0dSmrg 0, &expired); 27595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27605324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 27615324fb0dSmrg 27625324fb0dSmrg /* verify if memset test result meets with expected */ 27635324fb0dSmrg i = 0; 27645324fb0dSmrg while(i < bo_dst_size) { 27655324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 27665324fb0dSmrg } 27675324fb0dSmrg 27685324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 27695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27705324fb0dSmrg 27715324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 27725324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27735324fb0dSmrg 27745324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 27755324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27765324fb0dSmrg 27775324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 27785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27795324fb0dSmrg} 27805324fb0dSmrg 27815324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 27825324fb0dSmrg uint32_t ip_type, 27839bd392adSmrg uint32_t ring, 27840ed5401bSmrg uint32_t version, 27859bd392adSmrg int hang) 27865324fb0dSmrg{ 27875324fb0dSmrg amdgpu_context_handle context_handle; 27885324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 27895324fb0dSmrg volatile unsigned char *ptr_dst; 27905324fb0dSmrg void *ptr_shader; 27915324fb0dSmrg unsigned char *ptr_src; 27925324fb0dSmrg uint32_t *ptr_cmd; 27935324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 27945324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 27955324fb0dSmrg int i, r; 27965324fb0dSmrg int bo_dst_size = 16384; 27975324fb0dSmrg int bo_shader_size = 4096; 27985324fb0dSmrg int bo_cmd_size = 4096; 27995324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 28005324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 28019bd392adSmrg uint32_t expired, hang_state, hangs; 28029bd392adSmrg enum cs_type cs_type; 28035324fb0dSmrg amdgpu_bo_list_handle bo_list; 28045324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 28055324fb0dSmrg 28065324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 28075324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28085324fb0dSmrg 28095324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 28105324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 28115324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 28125324fb0dSmrg &mc_address_cmd, &va_cmd); 28135324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28145324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 28155324fb0dSmrg 28165324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 28175324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28185324fb0dSmrg &bo_shader, &ptr_shader, 28195324fb0dSmrg &mc_address_shader, &va_shader); 28205324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 282188f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 28225324fb0dSmrg 28239bd392adSmrg cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 28240ed5401bSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version); 28255324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28265324fb0dSmrg 28275324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28285324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28295324fb0dSmrg &bo_src, (void **)&ptr_src, 28305324fb0dSmrg &mc_address_src, &va_src); 28315324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28325324fb0dSmrg 28335324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28345324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28355324fb0dSmrg &bo_dst, (void **)&ptr_dst, 28365324fb0dSmrg &mc_address_dst, &va_dst); 28375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28385324fb0dSmrg 28395324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 28405324fb0dSmrg 28415324fb0dSmrg i = 0; 28420ed5401bSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 28435324fb0dSmrg 28445324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 28450ed5401bSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 28465324fb0dSmrg 28475324fb0dSmrg /* Writes shader state to HW */ 28480ed5401bSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 28495324fb0dSmrg 28505324fb0dSmrg /* Write constant data */ 28515324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 28525324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28535324fb0dSmrg ptr_cmd[i++] = 0x240; 28545324fb0dSmrg ptr_cmd[i++] = mc_address_src; 28555324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 28565324fb0dSmrg ptr_cmd[i++] = 0x400; 28570ed5401bSmrg if (version == 9) 28580ed5401bSmrg ptr_cmd[i++] = 0x74fac; 28590ed5401bSmrg else if (version == 10) 28600ed5401bSmrg ptr_cmd[i++] = 0x1104bfac; 28615324fb0dSmrg 28625324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 28635324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28645324fb0dSmrg ptr_cmd[i++] = 0x244; 28655324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 28665324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 28675324fb0dSmrg ptr_cmd[i++] = 0x400; 28680ed5401bSmrg if (version == 9) 28690ed5401bSmrg ptr_cmd[i++] = 0x74fac; 28700ed5401bSmrg else if (version == 10) 28710ed5401bSmrg ptr_cmd[i++] = 0x1104bfac; 28725324fb0dSmrg 287388f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 287488f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 287588f8a8d2Smrg ptr_cmd[i++] = 0x215; 287688f8a8d2Smrg ptr_cmd[i++] = 0; 287788f8a8d2Smrg 28785324fb0dSmrg /* dispatch direct command */ 28795324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 28805324fb0dSmrg ptr_cmd[i++] = 0x10; 28815324fb0dSmrg ptr_cmd[i++] = 1; 28825324fb0dSmrg ptr_cmd[i++] = 1; 28835324fb0dSmrg ptr_cmd[i++] = 1; 28845324fb0dSmrg 28855324fb0dSmrg while (i & 7) 28865324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 28875324fb0dSmrg 28885324fb0dSmrg resources[0] = bo_shader; 28895324fb0dSmrg resources[1] = bo_src; 28905324fb0dSmrg resources[2] = bo_dst; 28915324fb0dSmrg resources[3] = bo_cmd; 28925324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 28935324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28945324fb0dSmrg 28955324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 28965324fb0dSmrg ib_info.size = i; 28975324fb0dSmrg ibs_request.ip_type = ip_type; 28985324fb0dSmrg ibs_request.ring = ring; 28995324fb0dSmrg ibs_request.resources = bo_list; 29005324fb0dSmrg ibs_request.number_of_ibs = 1; 29015324fb0dSmrg ibs_request.ibs = &ib_info; 29025324fb0dSmrg ibs_request.fence_info.handle = NULL; 29035324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 29045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29055324fb0dSmrg 29065324fb0dSmrg fence_status.ip_type = ip_type; 29075324fb0dSmrg fence_status.ip_instance = 0; 29085324fb0dSmrg fence_status.ring = ring; 29095324fb0dSmrg fence_status.context = context_handle; 29105324fb0dSmrg fence_status.fence = ibs_request.seq_no; 29115324fb0dSmrg 29125324fb0dSmrg /* wait for IB accomplished */ 29135324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 29145324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 29155324fb0dSmrg 0, &expired); 29165324fb0dSmrg 29179bd392adSmrg if (!hang) { 29189bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29199bd392adSmrg CU_ASSERT_EQUAL(expired, true); 29209bd392adSmrg 29219bd392adSmrg /* verify if memcpy test result meets with expected */ 29229bd392adSmrg i = 0; 29239bd392adSmrg while(i < bo_dst_size) { 29249bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 29259bd392adSmrg i++; 29269bd392adSmrg } 29279bd392adSmrg } else { 29289bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 29299bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29309bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 29315324fb0dSmrg } 29325324fb0dSmrg 29335324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 29345324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29355324fb0dSmrg 29365324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 29375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29385324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 29395324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29405324fb0dSmrg 29415324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 29425324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29435324fb0dSmrg 29445324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 29455324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29465324fb0dSmrg 29475324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 29485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29495324fb0dSmrg} 295088f8a8d2Smrg 295188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void) 29525324fb0dSmrg{ 29535324fb0dSmrg int r; 29545324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 29550ed5401bSmrg uint32_t ring_id, version; 29565324fb0dSmrg 29575324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 29585324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 295988f8a8d2Smrg if (!info.available_rings) 296088f8a8d2Smrg printf("SKIP ... as there's no compute ring\n"); 29615324fb0dSmrg 29620ed5401bSmrg version = info.hw_ip_version_major; 29630ed5401bSmrg if (version != 9 && version != 10) { 29640ed5401bSmrg printf("SKIP ... unsupported gfx version %d\n", version); 29650ed5401bSmrg return; 29660ed5401bSmrg } 29670ed5401bSmrg 29685324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 29690ed5401bSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version); 29700ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0); 29715324fb0dSmrg } 297288f8a8d2Smrg} 297388f8a8d2Smrg 297488f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void) 297588f8a8d2Smrg{ 297688f8a8d2Smrg int r; 297788f8a8d2Smrg struct drm_amdgpu_info_hw_ip info; 29780ed5401bSmrg uint32_t ring_id, version; 29795324fb0dSmrg 29805324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 29815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 298288f8a8d2Smrg if (!info.available_rings) 298388f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 29845324fb0dSmrg 29850ed5401bSmrg version = info.hw_ip_version_major; 29860ed5401bSmrg if (version != 9 && version != 10) { 29870ed5401bSmrg printf("SKIP ... unsupported gfx version %d\n", version); 29880ed5401bSmrg return; 29890ed5401bSmrg } 29900ed5401bSmrg 29915324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 29920ed5401bSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version); 29930ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0); 29949bd392adSmrg } 29959bd392adSmrg} 29969bd392adSmrg 29979bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 29989bd392adSmrg{ 29999bd392adSmrg int r; 30009bd392adSmrg struct drm_amdgpu_info_hw_ip info; 30010ed5401bSmrg uint32_t ring_id, version; 30029bd392adSmrg 30039bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 30049bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30059bd392adSmrg if (!info.available_rings) 30069bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 30079bd392adSmrg 30080ed5401bSmrg version = info.hw_ip_version_major; 30090ed5401bSmrg if (version != 9 && version != 10) { 30100ed5401bSmrg printf("SKIP ... unsupported gfx version %d\n", version); 30110ed5401bSmrg return; 30120ed5401bSmrg } 30130ed5401bSmrg 30149bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 30150ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 30160ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1); 30170ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 30189bd392adSmrg } 30199bd392adSmrg} 30209bd392adSmrg 30219bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 30220ed5401bSmrg uint32_t ip_type, uint32_t ring, int version) 30239bd392adSmrg{ 30249bd392adSmrg amdgpu_context_handle context_handle; 30259bd392adSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 30269bd392adSmrg volatile unsigned char *ptr_dst; 30279bd392adSmrg void *ptr_shader; 30289bd392adSmrg unsigned char *ptr_src; 30299bd392adSmrg uint32_t *ptr_cmd; 30309bd392adSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 30319bd392adSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 30329bd392adSmrg int i, r; 30339bd392adSmrg int bo_dst_size = 0x4000000; 30349bd392adSmrg int bo_shader_size = 0x400000; 30359bd392adSmrg int bo_cmd_size = 4096; 30369bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 30379bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 30389bd392adSmrg uint32_t hang_state, hangs, expired; 30399bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 30409bd392adSmrg amdgpu_bo_list_handle bo_list; 30419bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 30429bd392adSmrg 30439bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 30449bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30459bd392adSmrg 30469bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 30479bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30489bd392adSmrg 30499bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 30509bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 30519bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 30529bd392adSmrg &mc_address_cmd, &va_cmd); 30539bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30549bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 30559bd392adSmrg 30569bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 30579bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30589bd392adSmrg &bo_shader, &ptr_shader, 30599bd392adSmrg &mc_address_shader, &va_shader); 30609bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30619bd392adSmrg memset(ptr_shader, 0, bo_shader_size); 30629bd392adSmrg 30639bd392adSmrg r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 30649bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30659bd392adSmrg 30669bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 30679bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30689bd392adSmrg &bo_src, (void **)&ptr_src, 30699bd392adSmrg &mc_address_src, &va_src); 30709bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30719bd392adSmrg 30729bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 30739bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30749bd392adSmrg &bo_dst, (void **)&ptr_dst, 30759bd392adSmrg &mc_address_dst, &va_dst); 30769bd392adSmrg CU_ASSERT_EQUAL(r, 0); 30779bd392adSmrg 30789bd392adSmrg memset(ptr_src, 0x55, bo_dst_size); 30799bd392adSmrg 30809bd392adSmrg i = 0; 30810ed5401bSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 30829bd392adSmrg 30839bd392adSmrg /* Issue commands to set cu mask used in current dispatch */ 30840ed5401bSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 30859bd392adSmrg 30869bd392adSmrg /* Writes shader state to HW */ 30870ed5401bSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 30889bd392adSmrg 30899bd392adSmrg /* Write constant data */ 30909bd392adSmrg /* Writes the texture resource constants data to the SGPRs */ 30919bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 30929bd392adSmrg ptr_cmd[i++] = 0x240; 30939bd392adSmrg ptr_cmd[i++] = mc_address_src; 30949bd392adSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 30959bd392adSmrg ptr_cmd[i++] = 0x400000; 30960ed5401bSmrg if (version == 9) 30970ed5401bSmrg ptr_cmd[i++] = 0x74fac; 30980ed5401bSmrg else if (version == 10) 30990ed5401bSmrg ptr_cmd[i++] = 0x1104bfac; 31009bd392adSmrg 31019bd392adSmrg /* Writes the UAV constant data to the SGPRs. */ 31029bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 31039bd392adSmrg ptr_cmd[i++] = 0x244; 31049bd392adSmrg ptr_cmd[i++] = mc_address_dst; 31059bd392adSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 31069bd392adSmrg ptr_cmd[i++] = 0x400000; 31070ed5401bSmrg if (version == 9) 31080ed5401bSmrg ptr_cmd[i++] = 0x74fac; 31090ed5401bSmrg else if (version == 10) 31100ed5401bSmrg ptr_cmd[i++] = 0x1104bfac; 31119bd392adSmrg 31129bd392adSmrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 31139bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 31149bd392adSmrg ptr_cmd[i++] = 0x215; 31159bd392adSmrg ptr_cmd[i++] = 0; 31169bd392adSmrg 31179bd392adSmrg /* dispatch direct command */ 31189bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 31199bd392adSmrg ptr_cmd[i++] = 0x10000; 31209bd392adSmrg ptr_cmd[i++] = 1; 31219bd392adSmrg ptr_cmd[i++] = 1; 31229bd392adSmrg ptr_cmd[i++] = 1; 31239bd392adSmrg 31249bd392adSmrg while (i & 7) 31259bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 31269bd392adSmrg 31279bd392adSmrg resources[0] = bo_shader; 31289bd392adSmrg resources[1] = bo_src; 31299bd392adSmrg resources[2] = bo_dst; 31309bd392adSmrg resources[3] = bo_cmd; 31319bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 31329bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31339bd392adSmrg 31349bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 31359bd392adSmrg ib_info.size = i; 31369bd392adSmrg ibs_request.ip_type = ip_type; 31379bd392adSmrg ibs_request.ring = ring; 31389bd392adSmrg ibs_request.resources = bo_list; 31399bd392adSmrg ibs_request.number_of_ibs = 1; 31409bd392adSmrg ibs_request.ibs = &ib_info; 31419bd392adSmrg ibs_request.fence_info.handle = NULL; 31429bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 31439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31449bd392adSmrg 31459bd392adSmrg fence_status.ip_type = ip_type; 31469bd392adSmrg fence_status.ip_instance = 0; 31479bd392adSmrg fence_status.ring = ring; 31489bd392adSmrg fence_status.context = context_handle; 31499bd392adSmrg fence_status.fence = ibs_request.seq_no; 31509bd392adSmrg 31519bd392adSmrg /* wait for IB accomplished */ 31529bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 31539bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 31549bd392adSmrg 0, &expired); 31559bd392adSmrg 31569bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 31579bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31589bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 31599bd392adSmrg 31609bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 31619bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31629bd392adSmrg 31639bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 31649bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31659bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 31669bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31679bd392adSmrg 31689bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 31699bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31709bd392adSmrg 31719bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 31729bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31739bd392adSmrg 31749bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 31759bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31769bd392adSmrg} 31779bd392adSmrg 31789bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 31799bd392adSmrg{ 31809bd392adSmrg int r; 31819bd392adSmrg struct drm_amdgpu_info_hw_ip info; 31820ed5401bSmrg uint32_t ring_id, version; 31839bd392adSmrg 31849bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 31859bd392adSmrg CU_ASSERT_EQUAL(r, 0); 31869bd392adSmrg if (!info.available_rings) 31879bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 31889bd392adSmrg 31890ed5401bSmrg version = info.hw_ip_version_major; 31900ed5401bSmrg if (version != 9 && version != 10) { 31910ed5401bSmrg printf("SKIP ... unsupported gfx version %d\n", version); 31920ed5401bSmrg return; 31930ed5401bSmrg } 31940ed5401bSmrg 31959bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 31960ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 31970ed5401bSmrg amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version); 31980ed5401bSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 31999bd392adSmrg } 32009bd392adSmrg} 32019bd392adSmrg 32029bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 32039bd392adSmrg{ 32049bd392adSmrg struct amdgpu_test_shader *shader; 32059bd392adSmrg int i, loop = 0x40000; 32069bd392adSmrg 32079bd392adSmrg switch (family) { 32089bd392adSmrg case AMDGPU_FAMILY_AI: 32099bd392adSmrg case AMDGPU_FAMILY_RV: 32109bd392adSmrg shader = &memcpy_ps_hang_slow_ai; 32119bd392adSmrg break; 32129bd392adSmrg default: 32139bd392adSmrg return -1; 32149bd392adSmrg break; 32155324fb0dSmrg } 32169bd392adSmrg 32179bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 32189bd392adSmrg 32199bd392adSmrg for (i = 0; i < loop; i++) 32209bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 32219bd392adSmrg shader->shader + shader->header_length, 32229bd392adSmrg shader->body_length * sizeof(uint32_t)); 32239bd392adSmrg 32249bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 32259bd392adSmrg shader->shader + shader->header_length + shader->body_length, 32269bd392adSmrg shader->foot_length * sizeof(uint32_t)); 32279bd392adSmrg 32289bd392adSmrg return 0; 32295324fb0dSmrg} 32305324fb0dSmrg 32310ed5401bSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version) 32325324fb0dSmrg{ 32335324fb0dSmrg int i; 32345324fb0dSmrg uint32_t shader_offset= 256; 32355324fb0dSmrg uint32_t mem_offset, patch_code_offset; 32365324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 32375324fb0dSmrg const uint32_t *shader; 32385324fb0dSmrg const uint32_t *patchinfo_code; 32395324fb0dSmrg const uint32_t *patchcode_offset; 32405324fb0dSmrg 32415324fb0dSmrg switch (ps_type) { 32425324fb0dSmrg case PS_CONST: 32430ed5401bSmrg if (version == 9) { 32440ed5401bSmrg shader = ps_const_shader_gfx9; 32450ed5401bSmrg shader_size = sizeof(ps_const_shader_gfx9); 32460ed5401bSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 32470ed5401bSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 32480ed5401bSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 32490ed5401bSmrg } else if (version == 10){ 32500ed5401bSmrg shader = ps_const_shader_gfx10; 32510ed5401bSmrg shader_size = sizeof(ps_const_shader_gfx10); 32520ed5401bSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10; 32530ed5401bSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10; 32540ed5401bSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx10; 32550ed5401bSmrg } 32565324fb0dSmrg break; 32575324fb0dSmrg case PS_TEX: 32580ed5401bSmrg if (version == 9) { 32590ed5401bSmrg shader = ps_tex_shader_gfx9; 32600ed5401bSmrg shader_size = sizeof(ps_tex_shader_gfx9); 32610ed5401bSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 32620ed5401bSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 32630ed5401bSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 32640ed5401bSmrg } else if (version == 10) { 32650ed5401bSmrg shader = ps_tex_shader_gfx10; 32660ed5401bSmrg shader_size = sizeof(ps_tex_shader_gfx10); 32670ed5401bSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10; 32680ed5401bSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10; 32690ed5401bSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10; 32700ed5401bSmrg } 32715324fb0dSmrg break; 32729bd392adSmrg case PS_HANG: 32739bd392adSmrg shader = memcpy_ps_hang; 32749bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 32759bd392adSmrg 32769bd392adSmrg memcpy(ptr, shader, shader_size); 32779bd392adSmrg return 0; 32785324fb0dSmrg default: 32795324fb0dSmrg return -1; 32805324fb0dSmrg break; 32815324fb0dSmrg } 32825324fb0dSmrg 32835324fb0dSmrg /* write main shader program */ 32845324fb0dSmrg for (i = 0 ; i < 10; i++) { 32855324fb0dSmrg mem_offset = i * shader_offset; 32865324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 32875324fb0dSmrg } 32885324fb0dSmrg 32895324fb0dSmrg /* overwrite patch codes */ 32905324fb0dSmrg for (i = 0 ; i < 10; i++) { 32915324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 32925324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 32935324fb0dSmrg memcpy(ptr + mem_offset, 32945324fb0dSmrg patchinfo_code + patch_code_offset, 32955324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 32965324fb0dSmrg } 32975324fb0dSmrg 32985324fb0dSmrg return 0; 32995324fb0dSmrg} 33005324fb0dSmrg 33015324fb0dSmrg/* load RectPosTexFast_VS */ 33020ed5401bSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version) 33035324fb0dSmrg{ 33045324fb0dSmrg const uint32_t *shader; 33055324fb0dSmrg uint32_t shader_size; 33065324fb0dSmrg 33070ed5401bSmrg if (version == 9) { 33080ed5401bSmrg shader = vs_RectPosTexFast_shader_gfx9; 33090ed5401bSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 33100ed5401bSmrg } else if (version == 10) { 33110ed5401bSmrg shader = vs_RectPosTexFast_shader_gfx10; 33120ed5401bSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx10); 33130ed5401bSmrg } 33145324fb0dSmrg 33155324fb0dSmrg memcpy(ptr, shader, shader_size); 33165324fb0dSmrg 33175324fb0dSmrg return 0; 33185324fb0dSmrg} 33195324fb0dSmrg 33200ed5401bSmrgstatic int amdgpu_draw_init(uint32_t *ptr, uint32_t version) 33215324fb0dSmrg{ 33225324fb0dSmrg int i = 0; 33235324fb0dSmrg const uint32_t *preamblecache_ptr; 33245324fb0dSmrg uint32_t preamblecache_size; 33255324fb0dSmrg 33265324fb0dSmrg /* Write context control and load shadowing register if necessary */ 33275324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 33285324fb0dSmrg ptr[i++] = 0x80000000; 33295324fb0dSmrg ptr[i++] = 0x80000000; 33305324fb0dSmrg 33310ed5401bSmrg if (version == 9) { 33320ed5401bSmrg preamblecache_ptr = preamblecache_gfx9; 33330ed5401bSmrg preamblecache_size = sizeof(preamblecache_gfx9); 33340ed5401bSmrg } else if (version == 10) { 33350ed5401bSmrg preamblecache_ptr = preamblecache_gfx10; 33360ed5401bSmrg preamblecache_size = sizeof(preamblecache_gfx10); 33370ed5401bSmrg } 33385324fb0dSmrg 33395324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 33405324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 33415324fb0dSmrg} 33425324fb0dSmrg 33435324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 33449bd392adSmrg uint64_t dst_addr, 33450ed5401bSmrg uint32_t version, 33469bd392adSmrg int hang_slow) 33475324fb0dSmrg{ 33485324fb0dSmrg int i = 0; 33495324fb0dSmrg 33505324fb0dSmrg /* setup color buffer */ 33510ed5401bSmrg if (version == 9) { 33520ed5401bSmrg /* offset reg 33530ed5401bSmrg 0xA318 CB_COLOR0_BASE 33540ed5401bSmrg 0xA319 CB_COLOR0_BASE_EXT 33550ed5401bSmrg 0xA31A CB_COLOR0_ATTRIB2 33560ed5401bSmrg 0xA31B CB_COLOR0_VIEW 33570ed5401bSmrg 0xA31C CB_COLOR0_INFO 33580ed5401bSmrg 0xA31D CB_COLOR0_ATTRIB 33590ed5401bSmrg 0xA31E CB_COLOR0_DCC_CONTROL 33600ed5401bSmrg 0xA31F CB_COLOR0_CMASK 33610ed5401bSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 33620ed5401bSmrg 0xA321 CB_COLOR0_FMASK 33630ed5401bSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 33640ed5401bSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 33650ed5401bSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 33660ed5401bSmrg 0xA325 CB_COLOR0_DCC_BASE 33670ed5401bSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 33680ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 33690ed5401bSmrg ptr[i++] = 0x318; 33700ed5401bSmrg ptr[i++] = dst_addr >> 8; 33710ed5401bSmrg ptr[i++] = dst_addr >> 40; 33720ed5401bSmrg ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; 33730ed5401bSmrg ptr[i++] = 0; 33740ed5401bSmrg ptr[i++] = 0x50438; 33750ed5401bSmrg ptr[i++] = 0x10140000; 33760ed5401bSmrg i += 9; 33770ed5401bSmrg 33780ed5401bSmrg /* mmCB_MRT0_EPITCH */ 33790ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 33800ed5401bSmrg ptr[i++] = 0x1e8; 33810ed5401bSmrg ptr[i++] = hang_slow ? 0xfff : 0x1f; 33820ed5401bSmrg } else if (version == 10) { 33830ed5401bSmrg /* 0xA318 CB_COLOR0_BASE 33840ed5401bSmrg 0xA319 CB_COLOR0_PITCH 33850ed5401bSmrg 0xA31A CB_COLOR0_SLICE 33860ed5401bSmrg 0xA31B CB_COLOR0_VIEW 33870ed5401bSmrg 0xA31C CB_COLOR0_INFO 33880ed5401bSmrg 0xA31D CB_COLOR0_ATTRIB 33890ed5401bSmrg 0xA31E CB_COLOR0_DCC_CONTROL 33900ed5401bSmrg 0xA31F CB_COLOR0_CMASK 33910ed5401bSmrg 0xA320 CB_COLOR0_CMASK_SLICE 33920ed5401bSmrg 0xA321 CB_COLOR0_FMASK 33930ed5401bSmrg 0xA322 CB_COLOR0_FMASK_SLICE 33940ed5401bSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 33950ed5401bSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 33960ed5401bSmrg 0xA325 CB_COLOR0_DCC_BASE */ 33970ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14); 33980ed5401bSmrg ptr[i++] = 0x318; 33990ed5401bSmrg ptr[i++] = dst_addr >> 8; 34000ed5401bSmrg i += 3; 34010ed5401bSmrg ptr[i++] = 0x50438; 34020ed5401bSmrg i += 9; 34030ed5401bSmrg 34040ed5401bSmrg /* 0xA390 CB_COLOR0_BASE_EXT */ 34050ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34060ed5401bSmrg ptr[i++] = 0x390; 34070ed5401bSmrg ptr[i++] = dst_addr >> 40; 34080ed5401bSmrg 34090ed5401bSmrg /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */ 34100ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34110ed5401bSmrg ptr[i++] = 0x398; 34120ed5401bSmrg ptr[i++] = 0; 34135324fb0dSmrg 34140ed5401bSmrg /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */ 34150ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34160ed5401bSmrg ptr[i++] = 0x3a0; 34170ed5401bSmrg ptr[i++] = 0; 34180ed5401bSmrg 34190ed5401bSmrg /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */ 34200ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34210ed5401bSmrg ptr[i++] = 0x3a8; 34220ed5401bSmrg ptr[i++] = 0; 34230ed5401bSmrg 34240ed5401bSmrg /* 0xA3B0 CB_COLOR0_ATTRIB2 */ 34250ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34260ed5401bSmrg ptr[i++] = 0x3b0; 34270ed5401bSmrg ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; 34280ed5401bSmrg 34290ed5401bSmrg /* 0xA3B8 CB_COLOR0_ATTRIB3 */ 34300ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34310ed5401bSmrg ptr[i++] = 0x3b8; 34320ed5401bSmrg ptr[i++] = 0x9014000; 34330ed5401bSmrg } 34345324fb0dSmrg 34355324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 34365324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34375324fb0dSmrg ptr[i++] = 0x32b; 34385324fb0dSmrg ptr[i++] = 0; 34395324fb0dSmrg 34405324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 34415324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34425324fb0dSmrg ptr[i++] = 0x33a; 34435324fb0dSmrg ptr[i++] = 0; 34445324fb0dSmrg 34455324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 34465324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34475324fb0dSmrg ptr[i++] = 0x1c5; 34485324fb0dSmrg ptr[i++] = 9; 34495324fb0dSmrg 34505324fb0dSmrg /* Setup depth buffer */ 34510ed5401bSmrg if (version == 9) { 34520ed5401bSmrg /* mmDB_Z_INFO */ 34530ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 34540ed5401bSmrg ptr[i++] = 0xe; 34550ed5401bSmrg i += 2; 34560ed5401bSmrg } else if (version == 10) { 34570ed5401bSmrg /* mmDB_Z_INFO */ 34580ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 34590ed5401bSmrg ptr[i++] = 0x10; 34600ed5401bSmrg i += 2; 34610ed5401bSmrg } 34625324fb0dSmrg 34635324fb0dSmrg return i; 34645324fb0dSmrg} 34655324fb0dSmrg 34660ed5401bSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, 34670ed5401bSmrg uint32_t version, 34680ed5401bSmrg int hang_slow) 34695324fb0dSmrg{ 34705324fb0dSmrg int i = 0; 34715324fb0dSmrg const uint32_t *cached_cmd_ptr; 34725324fb0dSmrg uint32_t cached_cmd_size; 34735324fb0dSmrg 34745324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 34755324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34765324fb0dSmrg ptr[i++] = 0xd7; 34775324fb0dSmrg ptr[i++] = 0; 34785324fb0dSmrg 34795324fb0dSmrg ptr[i++] = 0xffff1000; 34805324fb0dSmrg ptr[i++] = 0xc0021000; 34815324fb0dSmrg 34825324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 34835324fb0dSmrg ptr[i++] = 0xd7; 34840ed5401bSmrg if (version == 9) 34850ed5401bSmrg ptr[i++] = 1; 34860ed5401bSmrg else if (version == 10) 34870ed5401bSmrg ptr[i++] = 0; 34885324fb0dSmrg 34895324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 34905324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 34915324fb0dSmrg ptr[i++] = 0x2fe; 34925324fb0dSmrg i += 16; 34935324fb0dSmrg 34945324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 34955324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 34965324fb0dSmrg ptr[i++] = 0x2f5; 34975324fb0dSmrg i += 2; 34985324fb0dSmrg 34990ed5401bSmrg if (version == 9) { 35000ed5401bSmrg cached_cmd_ptr = cached_cmd_gfx9; 35010ed5401bSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 35020ed5401bSmrg } else if (version == 10) { 35030ed5401bSmrg cached_cmd_ptr = cached_cmd_gfx10; 35040ed5401bSmrg cached_cmd_size = sizeof(cached_cmd_gfx10); 35050ed5401bSmrg } 35065324fb0dSmrg 35075324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 35089bd392adSmrg if (hang_slow) 35099bd392adSmrg *(ptr + i + 12) = 0x8000800; 35105324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 35115324fb0dSmrg 35120ed5401bSmrg if (version == 10) { 35130ed5401bSmrg /* mmCB_RMI_GL2_CACHE_CONTROL */ 35140ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35150ed5401bSmrg ptr[i++] = 0x104; 35160ed5401bSmrg ptr[i++] = 0x40aa0055; 35170ed5401bSmrg /* mmDB_RMI_L2_CACHE_CONTROL */ 35180ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35190ed5401bSmrg ptr[i++] = 0x1f; 35200ed5401bSmrg ptr[i++] = 0x2a0055; 35210ed5401bSmrg } 35220ed5401bSmrg 35235324fb0dSmrg return i; 35245324fb0dSmrg} 35255324fb0dSmrg 35265324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 35275324fb0dSmrg int ps_type, 35289bd392adSmrg uint64_t shader_addr, 35290ed5401bSmrg uint32_t version, 35309bd392adSmrg int hang_slow) 35315324fb0dSmrg{ 35325324fb0dSmrg int i = 0; 35335324fb0dSmrg 35345324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 35355324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35365324fb0dSmrg ptr[i++] = 0x207; 35375324fb0dSmrg ptr[i++] = 0; 35385324fb0dSmrg 35390ed5401bSmrg if (version == 9) { 35400ed5401bSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 35410ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 35420ed5401bSmrg ptr[i++] = 0x46; 35430ed5401bSmrg ptr[i++] = 0xffff; 35440ed5401bSmrg } else if (version == 10) { 35450ed5401bSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 35460ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 35470ed5401bSmrg ptr[i++] = 0x30000046; 35480ed5401bSmrg ptr[i++] = 0xffff; 35490ed5401bSmrg /* mmSPI_SHADER_PGM_RSRC4_VS */ 35500ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 35510ed5401bSmrg ptr[i++] = 0x30000041; 35520ed5401bSmrg ptr[i++] = 0xffff; 35530ed5401bSmrg } 35545324fb0dSmrg 35555324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 35565324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 35575324fb0dSmrg ptr[i++] = 0x48; 35585324fb0dSmrg ptr[i++] = shader_addr >> 8; 35595324fb0dSmrg ptr[i++] = shader_addr >> 40; 35605324fb0dSmrg 35615324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 35625324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 35635324fb0dSmrg ptr[i++] = 0x4a; 35640ed5401bSmrg if (version == 9) 35650ed5401bSmrg ptr[i++] = 0xc0081; 35660ed5401bSmrg else if (version == 10) 35670ed5401bSmrg ptr[i++] = 0xc0041; 35685324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 35695324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 35705324fb0dSmrg ptr[i++] = 0x4b; 35715324fb0dSmrg ptr[i++] = 0x18; 35725324fb0dSmrg 35735324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 35745324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35755324fb0dSmrg ptr[i++] = 0x1b1; 35765324fb0dSmrg ptr[i++] = 2; 35775324fb0dSmrg 35785324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 35795324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35805324fb0dSmrg ptr[i++] = 0x1c3; 35815324fb0dSmrg ptr[i++] = 4; 35825324fb0dSmrg 35835324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 35845324fb0dSmrg ptr[i++] = 0x4c; 35855324fb0dSmrg i += 2; 35869bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 35879bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 35885324fb0dSmrg 35895324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 35905324fb0dSmrg ptr[i++] = 0x50; 35915324fb0dSmrg i += 2; 35925324fb0dSmrg if (ps_type == PS_CONST) { 35935324fb0dSmrg i += 2; 35945324fb0dSmrg } else if (ps_type == PS_TEX) { 35955324fb0dSmrg ptr[i++] = 0x3f800000; 35965324fb0dSmrg ptr[i++] = 0x3f800000; 35975324fb0dSmrg } 35985324fb0dSmrg 35995324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 36005324fb0dSmrg ptr[i++] = 0x54; 36015324fb0dSmrg i += 4; 36025324fb0dSmrg 36035324fb0dSmrg return i; 36045324fb0dSmrg} 36055324fb0dSmrg 36065324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 36075324fb0dSmrg int ps_type, 36080ed5401bSmrg uint64_t shader_addr, 36090ed5401bSmrg uint32_t version) 36105324fb0dSmrg{ 36115324fb0dSmrg int i, j; 36125324fb0dSmrg const uint32_t *sh_registers; 36135324fb0dSmrg const uint32_t *context_registers; 36145324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 36155324fb0dSmrg 36165324fb0dSmrg if (ps_type == PS_CONST) { 36170ed5401bSmrg if (version == 9) { 36180ed5401bSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 36190ed5401bSmrg num_sh_reg = ps_num_sh_registers_gfx9; 36200ed5401bSmrg } else if (version == 10) { 36210ed5401bSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10; 36220ed5401bSmrg num_sh_reg = ps_num_sh_registers_gfx10; 36230ed5401bSmrg } 36245324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 36255324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 36265324fb0dSmrg } else if (ps_type == PS_TEX) { 36275324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 36285324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 36295324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 36305324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 36315324fb0dSmrg } 36325324fb0dSmrg 36335324fb0dSmrg i = 0; 36345324fb0dSmrg 36350ed5401bSmrg if (version == 9) { 36360ed5401bSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 36370ed5401bSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 36380ed5401bSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 36390ed5401bSmrg /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */ 36400ed5401bSmrg shader_addr += 256 * 9; 36410ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 36420ed5401bSmrg ptr[i++] = 0x7; 36430ed5401bSmrg ptr[i++] = 0xffff; 36440ed5401bSmrg ptr[i++] = shader_addr >> 8; 36450ed5401bSmrg ptr[i++] = shader_addr >> 40; 36460ed5401bSmrg } else if (version == 10) { 36470ed5401bSmrg shader_addr += 256 * 9; 36480ed5401bSmrg /* 0x2c08 SPI_SHADER_PGM_LO_PS 36490ed5401bSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 36500ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 36510ed5401bSmrg ptr[i++] = 0x8; 36520ed5401bSmrg ptr[i++] = shader_addr >> 8; 36530ed5401bSmrg ptr[i++] = shader_addr >> 40; 36540ed5401bSmrg 36550ed5401bSmrg /* mmSPI_SHADER_PGM_RSRC3_PS */ 36560ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 36570ed5401bSmrg ptr[i++] = 0x30000007; 36580ed5401bSmrg ptr[i++] = 0xffff; 36590ed5401bSmrg /* mmSPI_SHADER_PGM_RSRC4_PS */ 36600ed5401bSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 36610ed5401bSmrg ptr[i++] = 0x30000001; 36620ed5401bSmrg ptr[i++] = 0xffff; 36630ed5401bSmrg } 36645324fb0dSmrg 36655324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 36665324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 36675324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 36685324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 36695324fb0dSmrg } 36705324fb0dSmrg 36715324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 36725324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 36735324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 36745324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 36755324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 36765324fb0dSmrg } 36775324fb0dSmrg 36785324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 36795324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 36805324fb0dSmrg ptr[i++] = 0x1b3; 36815324fb0dSmrg ptr[i++] = 2; 36825324fb0dSmrg } 36835324fb0dSmrg } 36845324fb0dSmrg 36855324fb0dSmrg return i; 36865324fb0dSmrg} 36875324fb0dSmrg 36880ed5401bSmrgstatic int amdgpu_draw_draw(uint32_t *ptr, uint32_t version) 36895324fb0dSmrg{ 36905324fb0dSmrg int i = 0; 36915324fb0dSmrg 36920ed5401bSmrg if (version == 9) { 36930ed5401bSmrg /* mmIA_MULTI_VGT_PARAM */ 36940ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 36950ed5401bSmrg ptr[i++] = 0x40000258; 36960ed5401bSmrg ptr[i++] = 0xd00ff; 36970ed5401bSmrg /* mmVGT_PRIMITIVE_TYPE */ 36980ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 36990ed5401bSmrg ptr[i++] = 0x10000242; 37000ed5401bSmrg ptr[i++] = 0x11; 37010ed5401bSmrg } else if (version == 10) { 37020ed5401bSmrg /* mmGE_CNTL */ 37030ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 37040ed5401bSmrg ptr[i++] = 0x25b; 37050ed5401bSmrg ptr[i++] = 0xff; 37060ed5401bSmrg /* mmVGT_PRIMITIVE_TYPE */ 37070ed5401bSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 37080ed5401bSmrg ptr[i++] = 0x242; 37090ed5401bSmrg ptr[i++] = 0x11; 37100ed5401bSmrg } 37115324fb0dSmrg 37125324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 37135324fb0dSmrg ptr[i++] = 3; 37145324fb0dSmrg ptr[i++] = 2; 37155324fb0dSmrg 37165324fb0dSmrg return i; 37175324fb0dSmrg} 37185324fb0dSmrg 37195324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 37205324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 37215324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 37225324fb0dSmrg uint64_t mc_address_shader_ps, 37235324fb0dSmrg uint64_t mc_address_shader_vs, 37240ed5401bSmrg uint32_t ring_id, uint32_t version) 37255324fb0dSmrg{ 37265324fb0dSmrg amdgpu_context_handle context_handle; 37275324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 37285324fb0dSmrg volatile unsigned char *ptr_dst; 37295324fb0dSmrg uint32_t *ptr_cmd; 37305324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 37315324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 37325324fb0dSmrg int i, r; 37335324fb0dSmrg int bo_dst_size = 16384; 37345324fb0dSmrg int bo_cmd_size = 4096; 37355324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 37365324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 37375324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 37385324fb0dSmrg uint32_t expired; 37395324fb0dSmrg amdgpu_bo_list_handle bo_list; 37405324fb0dSmrg 37415324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 37425324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 37435324fb0dSmrg 37445324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 37455324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 37465324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 37475324fb0dSmrg &mc_address_cmd, &va_cmd); 37485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 37495324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 37505324fb0dSmrg 37515324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 37525324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37535324fb0dSmrg &bo_dst, (void **)&ptr_dst, 37545324fb0dSmrg &mc_address_dst, &va_dst); 37555324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 37565324fb0dSmrg 37575324fb0dSmrg i = 0; 37580ed5401bSmrg i += amdgpu_draw_init(ptr_cmd + i, version); 37595324fb0dSmrg 37600ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); 37615324fb0dSmrg 37620ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); 37635324fb0dSmrg 37640ed5401bSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 37650ed5401bSmrg version, 0); 37665324fb0dSmrg 37670ed5401bSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version); 37685324fb0dSmrg 37695324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 37705324fb0dSmrg ptr_cmd[i++] = 0xc; 37715324fb0dSmrg ptr_cmd[i++] = 0x33333333; 37725324fb0dSmrg ptr_cmd[i++] = 0x33333333; 37735324fb0dSmrg ptr_cmd[i++] = 0x33333333; 37745324fb0dSmrg ptr_cmd[i++] = 0x33333333; 37755324fb0dSmrg 37760ed5401bSmrg i += amdgpu_draw_draw(ptr_cmd + i, version); 37775324fb0dSmrg 37785324fb0dSmrg while (i & 7) 37795324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 37805324fb0dSmrg 37815324fb0dSmrg resources[0] = bo_dst; 37825324fb0dSmrg resources[1] = bo_shader_ps; 37835324fb0dSmrg resources[2] = bo_shader_vs; 37845324fb0dSmrg resources[3] = bo_cmd; 37859bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 37865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 37875324fb0dSmrg 37885324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 37895324fb0dSmrg ib_info.size = i; 37905324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 37915324fb0dSmrg ibs_request.ring = ring_id; 37925324fb0dSmrg ibs_request.resources = bo_list; 37935324fb0dSmrg ibs_request.number_of_ibs = 1; 37945324fb0dSmrg ibs_request.ibs = &ib_info; 37955324fb0dSmrg ibs_request.fence_info.handle = NULL; 37965324fb0dSmrg 37975324fb0dSmrg /* submit CS */ 37985324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 37995324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38005324fb0dSmrg 38015324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 38025324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38035324fb0dSmrg 38045324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 38055324fb0dSmrg fence_status.ip_instance = 0; 38065324fb0dSmrg fence_status.ring = ring_id; 38075324fb0dSmrg fence_status.context = context_handle; 38085324fb0dSmrg fence_status.fence = ibs_request.seq_no; 38095324fb0dSmrg 38105324fb0dSmrg /* wait for IB accomplished */ 38115324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 38125324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 38135324fb0dSmrg 0, &expired); 38145324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38155324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 38165324fb0dSmrg 38175324fb0dSmrg /* verify if memset test result meets with expected */ 38185324fb0dSmrg i = 0; 38195324fb0dSmrg while(i < bo_dst_size) { 38205324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 38215324fb0dSmrg } 38225324fb0dSmrg 38235324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 38245324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38255324fb0dSmrg 38265324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 38275324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38285324fb0dSmrg 38295324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 38305324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38315324fb0dSmrg} 38325324fb0dSmrg 38335324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 38340ed5401bSmrg uint32_t ring, int version) 38355324fb0dSmrg{ 38365324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 38375324fb0dSmrg void *ptr_shader_ps; 38385324fb0dSmrg void *ptr_shader_vs; 38395324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 38405324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 38415324fb0dSmrg int r; 38425324fb0dSmrg int bo_shader_size = 4096; 38435324fb0dSmrg 38445324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 38455324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 38465324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 38475324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 38485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 384988f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 38505324fb0dSmrg 38515324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 38525324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 38535324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 38545324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 38555324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 385688f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 38575324fb0dSmrg 38580ed5401bSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version); 38595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38605324fb0dSmrg 38610ed5401bSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 38625324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38635324fb0dSmrg 38645324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 38650ed5401bSmrg mc_address_shader_ps, mc_address_shader_vs, 38660ed5401bSmrg ring, version); 38675324fb0dSmrg 38685324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 38695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38705324fb0dSmrg 38715324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 38725324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 38735324fb0dSmrg} 38745324fb0dSmrg 38755324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 38765324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 38775324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 38785324fb0dSmrg uint64_t mc_address_shader_ps, 38795324fb0dSmrg uint64_t mc_address_shader_vs, 38800ed5401bSmrg uint32_t ring, int version, int hang) 38815324fb0dSmrg{ 38825324fb0dSmrg amdgpu_context_handle context_handle; 38835324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 38845324fb0dSmrg volatile unsigned char *ptr_dst; 38855324fb0dSmrg unsigned char *ptr_src; 38865324fb0dSmrg uint32_t *ptr_cmd; 38875324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 38885324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 38895324fb0dSmrg int i, r; 38905324fb0dSmrg int bo_size = 16384; 38915324fb0dSmrg int bo_cmd_size = 4096; 38925324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 38935324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 38949bd392adSmrg uint32_t hang_state, hangs; 38959bd392adSmrg uint32_t expired; 38965324fb0dSmrg amdgpu_bo_list_handle bo_list; 38975324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 38985324fb0dSmrg 38995324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 39005324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39015324fb0dSmrg 39025324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 39035324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 39045324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 39055324fb0dSmrg &mc_address_cmd, &va_cmd); 39065324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39075324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 39085324fb0dSmrg 39095324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 39105324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 39115324fb0dSmrg &bo_src, (void **)&ptr_src, 39125324fb0dSmrg &mc_address_src, &va_src); 39135324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39145324fb0dSmrg 39155324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 39165324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 39175324fb0dSmrg &bo_dst, (void **)&ptr_dst, 39185324fb0dSmrg &mc_address_dst, &va_dst); 39195324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39205324fb0dSmrg 39215324fb0dSmrg memset(ptr_src, 0x55, bo_size); 39225324fb0dSmrg 39235324fb0dSmrg i = 0; 39240ed5401bSmrg i += amdgpu_draw_init(ptr_cmd + i, version); 39255324fb0dSmrg 39260ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); 39275324fb0dSmrg 39280ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); 39295324fb0dSmrg 39300ed5401bSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 39310ed5401bSmrg version, 0); 39325324fb0dSmrg 39330ed5401bSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); 39345324fb0dSmrg 39355324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 39360ed5401bSmrg if (version == 9) { 39370ed5401bSmrg ptr_cmd[i++] = 0xc; 39380ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 8; 39390ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 39400ed5401bSmrg ptr_cmd[i++] = 0x7c01f; 39410ed5401bSmrg ptr_cmd[i++] = 0x90500fac; 39420ed5401bSmrg ptr_cmd[i++] = 0x3e000; 39430ed5401bSmrg i += 3; 39440ed5401bSmrg } else if (version == 10) { 39450ed5401bSmrg ptr_cmd[i++] = 0xc; 39460ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 8; 39470ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; 39480ed5401bSmrg ptr_cmd[i++] = 0x8007c007; 39490ed5401bSmrg ptr_cmd[i++] = 0x90500fac; 39500ed5401bSmrg i += 2; 39510ed5401bSmrg ptr_cmd[i++] = 0x400; 39520ed5401bSmrg i++; 39530ed5401bSmrg } 39545324fb0dSmrg 39555324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 39565324fb0dSmrg ptr_cmd[i++] = 0x14; 39575324fb0dSmrg ptr_cmd[i++] = 0x92; 39585324fb0dSmrg i += 3; 39595324fb0dSmrg 396088f8a8d2Smrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 39615324fb0dSmrg ptr_cmd[i++] = 0x191; 39625324fb0dSmrg ptr_cmd[i++] = 0; 39635324fb0dSmrg 39640ed5401bSmrg i += amdgpu_draw_draw(ptr_cmd + i, version); 39655324fb0dSmrg 39665324fb0dSmrg while (i & 7) 39675324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 39685324fb0dSmrg 39695324fb0dSmrg resources[0] = bo_dst; 39705324fb0dSmrg resources[1] = bo_src; 39715324fb0dSmrg resources[2] = bo_shader_ps; 39725324fb0dSmrg resources[3] = bo_shader_vs; 39735324fb0dSmrg resources[4] = bo_cmd; 39745324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 39755324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39765324fb0dSmrg 39775324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 39785324fb0dSmrg ib_info.size = i; 39795324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 39805324fb0dSmrg ibs_request.ring = ring; 39815324fb0dSmrg ibs_request.resources = bo_list; 39825324fb0dSmrg ibs_request.number_of_ibs = 1; 39835324fb0dSmrg ibs_request.ibs = &ib_info; 39845324fb0dSmrg ibs_request.fence_info.handle = NULL; 39855324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 39865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 39875324fb0dSmrg 39885324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 39895324fb0dSmrg fence_status.ip_instance = 0; 39905324fb0dSmrg fence_status.ring = ring; 39915324fb0dSmrg fence_status.context = context_handle; 39925324fb0dSmrg fence_status.fence = ibs_request.seq_no; 39935324fb0dSmrg 39945324fb0dSmrg /* wait for IB accomplished */ 39955324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 39965324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 39975324fb0dSmrg 0, &expired); 39989bd392adSmrg if (!hang) { 39999bd392adSmrg CU_ASSERT_EQUAL(r, 0); 40009bd392adSmrg CU_ASSERT_EQUAL(expired, true); 40015324fb0dSmrg 40029bd392adSmrg /* verify if memcpy test result meets with expected */ 40039bd392adSmrg i = 0; 40049bd392adSmrg while(i < bo_size) { 40059bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 40069bd392adSmrg i++; 40079bd392adSmrg } 40089bd392adSmrg } else { 40099bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 40109bd392adSmrg CU_ASSERT_EQUAL(r, 0); 40119bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 40125324fb0dSmrg } 40135324fb0dSmrg 40145324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 40155324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40165324fb0dSmrg 40175324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 40185324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40195324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 40205324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40215324fb0dSmrg 40225324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 40235324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40245324fb0dSmrg 40255324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 40265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40275324fb0dSmrg} 40285324fb0dSmrg 40299bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 40300ed5401bSmrg int version, int hang) 40315324fb0dSmrg{ 40325324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 40335324fb0dSmrg void *ptr_shader_ps; 40345324fb0dSmrg void *ptr_shader_vs; 40355324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 40365324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 40375324fb0dSmrg int bo_shader_size = 4096; 40389bd392adSmrg enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 40395324fb0dSmrg int r; 40405324fb0dSmrg 40415324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 40425324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 40435324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 40445324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 40455324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 404688f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 40475324fb0dSmrg 40485324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 40495324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 40505324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 40515324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 40525324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 405388f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 40545324fb0dSmrg 40550ed5401bSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version); 40565324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40575324fb0dSmrg 40580ed5401bSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 40595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40605324fb0dSmrg 40615324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 40620ed5401bSmrg mc_address_shader_ps, mc_address_shader_vs, 40630ed5401bSmrg ring, version, hang); 40645324fb0dSmrg 40655324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 40665324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40675324fb0dSmrg 40685324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 40695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 40705324fb0dSmrg} 40715324fb0dSmrg 40725324fb0dSmrgstatic void amdgpu_draw_test(void) 40735324fb0dSmrg{ 40745324fb0dSmrg int r; 40755324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 40760ed5401bSmrg uint32_t ring_id, version; 40775324fb0dSmrg 40785324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 40795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 408088f8a8d2Smrg if (!info.available_rings) 408188f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 40825324fb0dSmrg 40830ed5401bSmrg version = info.hw_ip_version_major; 40840ed5401bSmrg if (version != 9 && version != 10) { 40850ed5401bSmrg printf("SKIP ... unsupported gfx version %d\n", version); 40860ed5401bSmrg return; 40870ed5401bSmrg } 40880ed5401bSmrg 40895324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 40900ed5401bSmrg amdgpu_memset_draw_test(device_handle, ring_id, version); 40910ed5401bSmrg amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); 40925324fb0dSmrg } 40935324fb0dSmrg} 409488f8a8d2Smrg 40950ed5401bSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version) 40969bd392adSmrg{ 40979bd392adSmrg amdgpu_context_handle context_handle; 40989bd392adSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 40999bd392adSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 41009bd392adSmrg void *ptr_shader_ps; 41019bd392adSmrg void *ptr_shader_vs; 41029bd392adSmrg volatile unsigned char *ptr_dst; 41039bd392adSmrg unsigned char *ptr_src; 41049bd392adSmrg uint32_t *ptr_cmd; 41059bd392adSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 41069bd392adSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 41079bd392adSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 41089bd392adSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 41099bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 41109bd392adSmrg int i, r; 41119bd392adSmrg int bo_size = 0x4000000; 41129bd392adSmrg int bo_shader_ps_size = 0x400000; 41139bd392adSmrg int bo_shader_vs_size = 4096; 41149bd392adSmrg int bo_cmd_size = 4096; 41159bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 41169bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 41179bd392adSmrg uint32_t hang_state, hangs, expired; 41189bd392adSmrg amdgpu_bo_list_handle bo_list; 41199bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 41209bd392adSmrg 41219bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 41229bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41239bd392adSmrg 41249bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 41259bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41269bd392adSmrg 41279bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 41289bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 41299bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 41309bd392adSmrg &mc_address_cmd, &va_cmd); 41319bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41329bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 41339bd392adSmrg 41349bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 41359bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 41369bd392adSmrg &bo_shader_ps, &ptr_shader_ps, 41379bd392adSmrg &mc_address_shader_ps, &va_shader_ps); 41389bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41399bd392adSmrg memset(ptr_shader_ps, 0, bo_shader_ps_size); 41409bd392adSmrg 41419bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 41429bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 41439bd392adSmrg &bo_shader_vs, &ptr_shader_vs, 41449bd392adSmrg &mc_address_shader_vs, &va_shader_vs); 41459bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41469bd392adSmrg memset(ptr_shader_vs, 0, bo_shader_vs_size); 41479bd392adSmrg 41489bd392adSmrg r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 41499bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41509bd392adSmrg 41510ed5401bSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 41529bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41539bd392adSmrg 41549bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 41559bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 41569bd392adSmrg &bo_src, (void **)&ptr_src, 41579bd392adSmrg &mc_address_src, &va_src); 41589bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41599bd392adSmrg 41609bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 41619bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 41629bd392adSmrg &bo_dst, (void **)&ptr_dst, 41639bd392adSmrg &mc_address_dst, &va_dst); 41649bd392adSmrg CU_ASSERT_EQUAL(r, 0); 41659bd392adSmrg 41669bd392adSmrg memset(ptr_src, 0x55, bo_size); 41679bd392adSmrg 41689bd392adSmrg i = 0; 41690ed5401bSmrg i += amdgpu_draw_init(ptr_cmd + i, version); 41709bd392adSmrg 41710ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1); 41729bd392adSmrg 41730ed5401bSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1); 41749bd392adSmrg 41759bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 41760ed5401bSmrg mc_address_shader_vs, version, 1); 41779bd392adSmrg 41780ed5401bSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); 41799bd392adSmrg 41809bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 41810ed5401bSmrg 41820ed5401bSmrg if (version == 9) { 41830ed5401bSmrg ptr_cmd[i++] = 0xc; 41840ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 8; 41850ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 41860ed5401bSmrg ptr_cmd[i++] = 0x1ffcfff; 41870ed5401bSmrg ptr_cmd[i++] = 0x90500fac; 41880ed5401bSmrg ptr_cmd[i++] = 0x1ffe000; 41890ed5401bSmrg i += 3; 41900ed5401bSmrg } else if (version == 10) { 41910ed5401bSmrg ptr_cmd[i++] = 0xc; 41920ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 8; 41930ed5401bSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; 41940ed5401bSmrg ptr_cmd[i++] = 0x81ffc1ff; 41950ed5401bSmrg ptr_cmd[i++] = 0x90500fac; 41960ed5401bSmrg i += 4; 41970ed5401bSmrg } 41989bd392adSmrg 41999bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 42009bd392adSmrg ptr_cmd[i++] = 0x14; 42019bd392adSmrg ptr_cmd[i++] = 0x92; 42029bd392adSmrg i += 3; 42039bd392adSmrg 42049bd392adSmrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 42059bd392adSmrg ptr_cmd[i++] = 0x191; 42069bd392adSmrg ptr_cmd[i++] = 0; 42079bd392adSmrg 42080ed5401bSmrg i += amdgpu_draw_draw(ptr_cmd + i, version); 42099bd392adSmrg 42109bd392adSmrg while (i & 7) 42119bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 42129bd392adSmrg 42139bd392adSmrg resources[0] = bo_dst; 42149bd392adSmrg resources[1] = bo_src; 42159bd392adSmrg resources[2] = bo_shader_ps; 42169bd392adSmrg resources[3] = bo_shader_vs; 42179bd392adSmrg resources[4] = bo_cmd; 42189bd392adSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 42199bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42209bd392adSmrg 42219bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 42229bd392adSmrg ib_info.size = i; 42239bd392adSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 42249bd392adSmrg ibs_request.ring = ring; 42259bd392adSmrg ibs_request.resources = bo_list; 42269bd392adSmrg ibs_request.number_of_ibs = 1; 42279bd392adSmrg ibs_request.ibs = &ib_info; 42289bd392adSmrg ibs_request.fence_info.handle = NULL; 42299bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 42309bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42319bd392adSmrg 42329bd392adSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 42339bd392adSmrg fence_status.ip_instance = 0; 42349bd392adSmrg fence_status.ring = ring; 42359bd392adSmrg fence_status.context = context_handle; 42369bd392adSmrg fence_status.fence = ibs_request.seq_no; 42379bd392adSmrg 42389bd392adSmrg /* wait for IB accomplished */ 42399bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 42409bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 42419bd392adSmrg 0, &expired); 42429bd392adSmrg 42439bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 42449bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42459bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 42469bd392adSmrg 42479bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 42489bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42499bd392adSmrg 42509bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 42519bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42529bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 42539bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42549bd392adSmrg 42559bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 42569bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42579bd392adSmrg 42589bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 42599bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42609bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 42619bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42629bd392adSmrg 42639bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 42649bd392adSmrg CU_ASSERT_EQUAL(r, 0); 42659bd392adSmrg} 42669bd392adSmrg 426788f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void) 426888f8a8d2Smrg{ 426988f8a8d2Smrg int r; 427088f8a8d2Smrg char debugfs_path[256], tmp[10]; 427188f8a8d2Smrg int fd; 427288f8a8d2Smrg struct stat sbuf; 427388f8a8d2Smrg amdgpu_context_handle context_handle; 427488f8a8d2Smrg uint32_t hang_state, hangs; 427588f8a8d2Smrg 427688f8a8d2Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 427788f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 427888f8a8d2Smrg 427988f8a8d2Smrg r = fstat(drm_amdgpu[0], &sbuf); 428088f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 428188f8a8d2Smrg 428288f8a8d2Smrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 428388f8a8d2Smrg fd = open(debugfs_path, O_RDONLY); 428488f8a8d2Smrg CU_ASSERT(fd >= 0); 428588f8a8d2Smrg 428688f8a8d2Smrg r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 428788f8a8d2Smrg CU_ASSERT(r > 0); 428888f8a8d2Smrg 428988f8a8d2Smrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 429088f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 429188f8a8d2Smrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 429288f8a8d2Smrg 429388f8a8d2Smrg close(fd); 429488f8a8d2Smrg r = amdgpu_cs_ctx_free(context_handle); 429588f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 429688f8a8d2Smrg 429788f8a8d2Smrg amdgpu_compute_dispatch_test(); 429888f8a8d2Smrg amdgpu_gfx_dispatch_test(); 429988f8a8d2Smrg} 43000ed5401bSmrg 43010ed5401bSmrgstatic void amdgpu_stable_pstate_test(void) 43020ed5401bSmrg{ 43030ed5401bSmrg int r; 43040ed5401bSmrg amdgpu_context_handle context_handle; 43050ed5401bSmrg uint32_t current_pstate = 0, new_pstate = 0; 43060ed5401bSmrg 43070ed5401bSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 43080ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 43090ed5401bSmrg 43100ed5401bSmrg r = amdgpu_cs_ctx_stable_pstate(context_handle, 43110ed5401bSmrg AMDGPU_CTX_OP_GET_STABLE_PSTATE, 43120ed5401bSmrg 0, ¤t_pstate); 43130ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 43140ed5401bSmrg CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE); 43150ed5401bSmrg 43160ed5401bSmrg r = amdgpu_cs_ctx_stable_pstate(context_handle, 43170ed5401bSmrg AMDGPU_CTX_OP_SET_STABLE_PSTATE, 43180ed5401bSmrg AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL); 43190ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 43200ed5401bSmrg 43210ed5401bSmrg r = amdgpu_cs_ctx_stable_pstate(context_handle, 43220ed5401bSmrg AMDGPU_CTX_OP_GET_STABLE_PSTATE, 43230ed5401bSmrg 0, &new_pstate); 43240ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 43250ed5401bSmrg CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK); 43260ed5401bSmrg 43270ed5401bSmrg r = amdgpu_cs_ctx_free(context_handle); 43280ed5401bSmrg CU_ASSERT_EQUAL(r, 0); 43290ed5401bSmrg} 4330